示例#1
0
def vwao_from_bars(bars):
    def format_vwao(d):
        d['back_prices'] = [{'amount': d['total_matched'] / 2.0,
                             'price': d['vwao'],
                             'depth': 1,
                             'type': 'L'}]
        d['lay_prices'] = [{'amount': d['total_matched'] / 2.0,
                            'price': d['vwao'],
                            'depth': 1,
                            'type': 'B'}]
        d['last_price_matched'] = d['vwao']
        del d['vwao']
        return d

    logging.info('Calculating VWAO from bars..')
    bars['notional'] = bars.volume_matched * bars.odds

    agg_dict = {'country': get_first,
                'event': get_first,
                'course': get_first,
                'scheduled_off': get_first,
                'notional': lambda x: float(np.sum(x)),
                'volume_matched': lambda x: float(np.sum(x)),
                'selection': get_first}
    gb = bars.dropna(subset=['selection']).groupby(['market_id', 'selection_id']).aggregate(agg_dict) \
        .rename(columns={'volume_matched': 'total_matched'})
    gb['vwao'] = gb.notional / gb.total_matched
    del gb['notional']

    return imap(format_vwao, pandas_to_dicts(gb.reset_index()))
示例#2
0
def upload(args):
    args, path = args
    parse = lambda x: dateutil.parser.parse(x, dayfirst=True)

    try:
        directory, file_name = split(path)
        file_part, ext = splitext(file_name)

        formatter = logging.Formatter('%(asctime)s - ' + file_name + ' - %(levelname)s: %(message)s')
        configure_root_logger(args.logtty, args.logfile, formatter=formatter)
        db = MongoClient(args.host, args.port)[args.db]

        if ext == '.zip':
            logging.info('Reading zipped csv file into memory')
            fin = zipfile.ZipFile(path, 'r').open(file_part + '.csv')
        else:
            logging.info('Reading csv file into memory')
            fin = path

        bars = pd.read_csv(fin, parse_dates=['SCHEDULED_OFF'], date_parser=parse)
        bars.columns = bars.columns.map(lambda x: x.lower())
        bars = bars.rename(columns={'event_id': 'market_id'})
        for col in ['market_id', 'selection_id']:
            bars[col] = bars[col].map(str)  # Make sure dtype==str

        # Insert other filters here:
        bars = bars[bars.in_play == 'PE']
        bars['selection'] = bars['selection'].map(extract_horse_name)

        races = races_from_bars(bars).reset_index()
        train = training_from_races(races)
        vwao = vwao_from_bars(bars)

        try:
            db[args.races].insert(pandas_to_dicts(races), continue_on_error=True)
        except DuplicateKeyError as e:
            logging.error('Some duplicate keys in %s; If this is a surprise, ABORT! msg=%s' % (db[args.races], e))

        try:
            db[args.train].insert(convert_types(train, {'n_runners': int}), continue_on_error=True)
        except DuplicateKeyError as e:
            logging.error('Some duplicate keys in %s; If this is a surprise, ABORT! msg=%s' % (db[args.train], e))

        try:
            db[args.vwao].insert(vwao, continue_on_error=True)
        except DuplicateKeyError as e:
            logging.error('Some duplicate keys in %s; If this is a surprise, ABORT! msg=%s' % (db[args.vwao], e))

        logging.info('Successfully uploaded to %s' % db)
    except Exception as e:
        logging.critical(e)
        raise
示例#3
0
def run_backtest(context):
    n_bkt, args, mparams = context

    formatter = logging.Formatter('%(asctime)s - n_bkt=' + str(n_bkt) + ' - %(levelname)s: %(message)s')
    configure_root_logger(args.logtty, args.logfile,
                          MongoClient(args.host, args.port)[args.db][args.logmongo] if args.logmongo is not None
                          else None, formatter=formatter)

    db = MongoClient(args.host, args.port)[args.db]

    where_clause = defaultdict(lambda: {})
    country, start_date, end_date = 'GB', parse_date(args.start), parse_date(args.end)
    if start_date is not None:
        where_clause['scheduled_off']['$gte'] = start_date
    if end_date is not None:
        where_clause['scheduled_off']['$lte'] = end_date
    if country is not None:
        where_clause['country'] = country
    sorted_races = db[args.train].find(where_clause, sort=[('scheduled_off', 1)], timeout=False)

    exec_services = HistoricalExecutionService(db)
    strat = strategy.Balius(mu=mparams['mu'], sigma=mparams['sigma'], beta=mparams['beta'], tau=mparams['tau'],
                            draw_probability=mparams['draw_prob'], risk_aversion=mparams['risk_aversion'],
                            min_races=mparams['min_races'], max_exposure=mparams['max_exposure'])
    st = time.clock()
    strategy.backtest(exec_services, strat, sorted_races)
    en = time.clock()
    logging.info('Backtest finished in %.2f seconds' % (en - st))

    strat_dict = strat.to_dict()
    strat_id = db[STRATEGIES_COLL].insert(strat_dict)
    logging.info('Strategy serialised to %s with id=%s' % (db[STRATEGIES_COLL], strat_id))

    bets = price_historical_bets(db, exec_services.get_mu_bets()[0])
    scorecard = make_scorecard(bets)
    now = datetime.datetime.utcnow()
    scorecard.update({'params': {'ts': strat_dict['hm']['ts'], 'risk': strat_dict['risk']},
                      'timestamp': now,
                      'run_seconds': en - st,
                      'strategy_id': strat_id})
    scorecard_id = db[SCORECARDS_COLL].insert(scorecard)
    logging.info('Scorecard inserted in %s with id=%s' % (db[SCORECARDS_COLL], scorecard_id))

    db[BETS_COLL].insert(add_scorecard_id_to_dicts(scorecard_id, bets))
    logging.info('Associated bets inserted in %s' % db[BETS_COLL])

    markets = market_breakdown(bets).reset_index()
    markets = pandas_to_dicts(markets, {'n_runners': int})
    db[MARKETS_COLL].insert(add_scorecard_id_to_dicts(scorecard_id, markets))
    logging.info('Associated market breakdown inserted in %s' % db[MARKETS_COLL])
示例#4
0
def upload(args):
    first = lambda x: x.iget(0)
    args, path = args


    try:
        _, file_name = split(path)

        formatter = logging.Formatter('%(asctime)s - ' + file_name + ' - %(levelname)s: %(message)s')
        configure_root_logger(formatter=formatter)
        db = MongoClient(args.host, args.port)[args.db]

        logging.info('Reading csv file into memory')
        races = pd.read_csv(path, sep='\t', parse_dates=[[0, 1]], dayfirst=True)
        if len(races) <=2 :
            logging.warning('No races in file. Skipping')
            return

        races.rename(columns={'race_date_race_time': 'scheduled_off',
                              'horse_name': 'selection',
                              'place': 'ranking'}, inplace=True)

        races['selection'] = races['selection'].map(parse_horse_name)
        races['ranking'] = races['ranking'].map(parse_place)

        races = pd.DataFrame.from_dict([{'course': k[0],
                                         'scheduled_off': k[1],
                                         'selection': v['selection'][v.ranking >= 0].tolist(),
                                         'ranking': (v['ranking'][v.ranking >= 0] - 1).tolist()}
                                        for k, v in races.groupby(['track', 'scheduled_off'])])

        #print(races)

        dtypes = list(races.dtypes[races.dtypes == np.int64].index)
        type_mappers = dict(zip(dtypes, [int] * len(dtypes)))
        db[args.races].insert(pandas_to_dicts(races))

        logging.info('Successfully uploaded to %s' % db)
    except Exception as e:
        logging.critical(e)
        raise
示例#5
0
def vwao_from_bars(bars):
    def format_vwao(d):
        d['back_prices'] = [{
            'amount': d['total_matched'] / 2.0,
            'price': d['vwao'],
            'depth': 1,
            'type': 'L'
        }]
        d['lay_prices'] = [{
            'amount': d['total_matched'] / 2.0,
            'price': d['vwao'],
            'depth': 1,
            'type': 'B'
        }]
        d['last_price_matched'] = d['vwao']
        del d['vwao']
        return d

    logging.info('Calculating VWAO from bars..')
    bars['notional'] = bars.volume_matched * bars.odds

    agg_dict = {
        'country': get_first,
        'event': get_first,
        'course': get_first,
        'scheduled_off': get_first,
        'notional': lambda x: float(np.sum(x)),
        'volume_matched': lambda x: float(np.sum(x)),
        'selection': get_first
    }
    gb = bars.dropna(subset=['selection']).groupby(['market_id', 'selection_id']).aggregate(agg_dict) \
        .rename(columns={'volume_matched': 'total_matched'})
    gb['vwao'] = gb.notional / gb.total_matched
    del gb['notional']

    return imap(format_vwao, pandas_to_dicts(gb.reset_index()))
示例#6
0
def upload(args):
    args, path = args
    parse = lambda x: dateutil.parser.parse(x, dayfirst=True)

    try:
        directory, file_name = split(path)
        file_part, ext = splitext(file_name)

        formatter = logging.Formatter('%(asctime)s - ' + file_name +
                                      ' - %(levelname)s: %(message)s')
        configure_root_logger(args.logtty, args.logfile, formatter=formatter)
        db = MongoClient(args.host, args.port)[args.db]

        if ext == '.zip':
            logging.info('Reading zipped csv file into memory')
            fin = zipfile.ZipFile(path, 'r').open(file_part + '.csv')
        else:
            logging.info('Reading csv file into memory')
            fin = path

        bars = pd.read_csv(fin,
                           parse_dates=['SCHEDULED_OFF'],
                           date_parser=parse)
        bars.columns = bars.columns.map(lambda x: x.lower())
        bars = bars.rename(columns={'event_id': 'market_id'})
        for col in ['market_id', 'selection_id']:
            bars[col] = bars[col].map(str)  # Make sure dtype==str

        # Insert other filters here:
        bars = bars[bars.in_play == 'PE']
        bars['selection'] = bars['selection'].map(extract_horse_name)

        races = races_from_bars(bars).reset_index()
        train = training_from_races(races)
        vwao = vwao_from_bars(bars)

        try:
            db[args.races].insert(pandas_to_dicts(races),
                                  continue_on_error=True)
        except DuplicateKeyError as e:
            logging.error(
                'Some duplicate keys in %s; If this is a surprise, ABORT! msg=%s'
                % (db[args.races], e))

        try:
            db[args.train].insert(convert_types(train, {'n_runners': int}),
                                  continue_on_error=True)
        except DuplicateKeyError as e:
            logging.error(
                'Some duplicate keys in %s; If this is a surprise, ABORT! msg=%s'
                % (db[args.train], e))

        try:
            db[args.vwao].insert(vwao, continue_on_error=True)
        except DuplicateKeyError as e:
            logging.error(
                'Some duplicate keys in %s; If this is a surprise, ABORT! msg=%s'
                % (db[args.vwao], e))

        logging.info('Successfully uploaded to %s' % db)
    except Exception as e:
        logging.critical(e)
        raise
示例#7
0
def run_backtest(context):
    n_bkt, args, mparams = context

    formatter = logging.Formatter('%(asctime)s - n_bkt=' + str(n_bkt) +
                                  ' - %(levelname)s: %(message)s')
    configure_root_logger(
        args.logtty,
        args.logfile,
        MongoClient(args.host, args.port)[args.db][args.logmongo]
        if args.logmongo is not None else None,
        formatter=formatter)

    db = MongoClient(args.host, args.port)[args.db]

    where_clause = defaultdict(lambda: {})
    country, start_date, end_date = 'GB', parse_date(args.start), parse_date(
        args.end)
    if start_date is not None:
        where_clause['scheduled_off']['$gte'] = start_date
    if end_date is not None:
        where_clause['scheduled_off']['$lte'] = end_date
    if country is not None:
        where_clause['country'] = country
    sorted_races = db[args.train].find(where_clause,
                                       sort=[('scheduled_off', 1)],
                                       timeout=False)

    exec_services = HistoricalExecutionService(db)
    strat = strategy.Balius(mu=mparams['mu'],
                            sigma=mparams['sigma'],
                            beta=mparams['beta'],
                            tau=mparams['tau'],
                            draw_probability=mparams['draw_prob'],
                            risk_aversion=mparams['risk_aversion'],
                            min_races=mparams['min_races'],
                            max_exposure=mparams['max_exposure'])
    st = time.clock()
    strategy.backtest(exec_services, strat, sorted_races)
    en = time.clock()
    logging.info('Backtest finished in %.2f seconds' % (en - st))

    strat_dict = strat.to_dict()
    strat_id = db[STRATEGIES_COLL].insert(strat_dict)
    logging.info('Strategy serialised to %s with id=%s' %
                 (db[STRATEGIES_COLL], strat_id))

    bets = price_historical_bets(db, exec_services.get_mu_bets()[0])
    scorecard = make_scorecard(bets)
    now = datetime.datetime.utcnow()
    scorecard.update({
        'params': {
            'ts': strat_dict['hm']['ts'],
            'risk': strat_dict['risk']
        },
        'timestamp': now,
        'run_seconds': en - st,
        'strategy_id': strat_id
    })
    scorecard_id = db[SCORECARDS_COLL].insert(scorecard)
    logging.info('Scorecard inserted in %s with id=%s' %
                 (db[SCORECARDS_COLL], scorecard_id))

    db[BETS_COLL].insert(add_scorecard_id_to_dicts(scorecard_id, bets))
    logging.info('Associated bets inserted in %s' % db[BETS_COLL])

    markets = market_breakdown(bets).reset_index()
    markets = pandas_to_dicts(markets, {'n_runners': int})
    db[MARKETS_COLL].insert(add_scorecard_id_to_dicts(scorecard_id, markets))
    logging.info('Associated market breakdown inserted in %s' %
                 db[MARKETS_COLL])