Exemplo n.º 1
0
def avg_ltp_for_winner():
    start = -3 * 60
    end = 10 * 60
    step = 10
    x = np.arange(start, end, step)

    buckets = (x).tolist()

    m = MongoManager(use_remote=True, use_archive=False)
    from_date = datetime.now() - timedelta(days=1000)  # datetime(2018, 2, 28)
    to_date = datetime.now()
    ltp = m.get_avg_ltp_by_time_to_start_bucketed('LTP',
                                                  'avg',
                                                  buckets,
                                                  from_date,
                                                  to_date,
                                                  countrycode=['UK', 'IE'],
                                                  col='price_scrape_tickdata')
    print(ltp)

    LTPs = [v['avg_ltp'] for v in ltp]

    print("Best time to place lays is {} minutes before race start.".format(
        np.argmin(LTPs)))
    plt.plot(x, LTPs)
    plt.show()
    def load_enriched_ts(self, from_year=2000, to_year=2100, strategy='back', clip=-200, localhost=True,
                         use_archive=True, countrycode=None):
        self.strategy = strategy

        m = MongoManager(use_remote=not localhost, use_archive=use_archive)

        log.info("Downloading...")
        df = m.download_enriched_TS(datetime(from_year, 1, 1, 0, 0), datetime(to_year, 12, 31, 23, 59),
                                    countrycode=countrycode)
        log.info("Loaded bets data: {}".format(len(df)))
        df = df.drop_duplicates(['marketid', 'selection_id'])
        df = df.sort_values('marketstarttime')

        log.info("After removing duplicates: {}".format(len(df)))

        if strategy == 'lay':
            df['act'] = 1 - df['winner']
        else:
            df['act'] = df['winner']

        self.X = df[['LTP t-0', 'average', 'minimum', 'maximum', 'median', 'std', 'participants', 'skew', 'kurtosis',
                     'overrun']].values
        self.Y = df[['act', strategy]].values

        df['lay'] = df['lay'].clip(clip, 1)  # clip lays at clip value
        self.weights_original = df[strategy].values  # contains back or lay payoffs

        self.cp = CustomPayoffs(strategy)
Exemplo n.º 3
0
def collect_results():
    m = MongoManager()
    markets = m.mongodb_price['price_scrape'].distinct('marketid')
    all_results = m.mongodb_price['results_scrape'].distinct('marketid')
    to_get = set(markets) - set(all_results)
    log.info("To get {} results".format(len(to_get)))
    if not to_get:
        return

    results = {}

    for mk in to_get:
        results.update(get_market_result(mk))
        # time.sleep(0.2)

    log.info("Got {} results".format(len(results)))

    for k, v in results.items():
        d = {}
        winner = None
        losers = []
        for k1, v1 in v.items():
            if v1 == 'WINNER':
                winner = k1
            if v1 == 'LOSER':
                losers.append(k1)
        d.update({'marketid': k})
        d['timestamp'] = datetime.datetime.now()
        d['winner'] = winner
        d['losers'] = losers

        if winner is not None:  # filter out races that have not yet completed
            m.insert_document('results_scrape', d, database='price')
Exemplo n.º 4
0
def upcoming_races():
    m = MongoManager()
    tables = []
    titles = ['', 'Upcoming races']
    df = pd.DataFrame.from_dict(m.upcoming_races())
    tables.append(df.to_html())
    return render_template('table.html', tables=tables, titles=titles)
Exemplo n.º 5
0
 def test_mongo_get_bets0(self):
     m = MongoManager(use_archive=False)
     m.reconnect(use_archive=False)
     data = self.mongodb.get_ts_for_races_about_to_start(
         now=datetime.datetime.strptime('2014-02-28 16:54:15',
                                        "%Y-%m-%d %H:%M:%S"))
     self.assertEqual(0, len(data))
Exemplo n.º 6
0
 def test_get_last_LTPs(self):
     m = MongoManager(use_archive=True)
     m.reconnect(use_archive=False)
     now = datetime.datetime.strptime('2018-02-28 16:54:15',
                                      "%Y-%m-%d %H:%M:%S")
     LTPs = m.get_last_LTPs(selection_id=16377754,
                            amount_of_prices=3,
                            now=now)
     self.assertEqual(len(LTPs), 3)
Exemplo n.º 7
0
def find_race_to_bet(now=None, min_before_start=0, countrycodes=['GB', 'IE']):
    """ Returns races that will start within the next minute as numpy array to be passed into make_bet_recommendation"""

    log.debug("Start downloading races to bet")
    now = dt.datetime.now() if not now else now
    m = MongoManager()
    data = m.get_ts_for_races_about_to_start(now, countrycodes,
                                             min_before_start)

    # Group quotes by race ($marketid) and horse ($selection_id)
    # Fallback to back_prices0 if LTP = None (no traded price)
    races = {}

    for race in data:
        race_dict = {}
        horses = sorted(list(set([x['Horse'] for x in race['TS']])))
        race_start_time = sorted(
            list(set([x['RaceStartTime'] for x in race['TS']])))
        race_event_id = sorted(list(set([x['EventID'] for x in race['TS']])))

        if len(race_start_time) != 1:
            log.warning(
                "Inconsistent start time for race %s! Double check its quotes!"
                % race['_id'])

        race_dict['RaceStartTime'] = race_start_time[0]
        race_dict['EventID'] = race_event_id[0]
        race_dict['SelectionIds'] = horses

        for horse in horses:  # todo: this is inconsistent with training
            ts = pd.Series({
                x['Time']: x['LTP'] if x['LTP'] else x['BestBack']
                for x in race['TS'] if x['Horse'] == horse
            })

            # Check/log NaNs ratio if significant
            try:
                if float(np.count_nonzero(np.isnan(ts)) / len(ts)) >= 0.25:
                    log.warning("NaNs ratio >= 25%% found for TS for MarketID: %s - EventID: %s - SelectionID: %s!" % \
                                (race['_id'], race_dict['EventID'], horse))
            except:
                log.warning("All entries None in  TS for MarketID: %s - EventID: %s - SelectionID: %s!" % \
                            (race['_id'], race_dict['EventID'], horse))

            race_dict[horse] = ts

        races[race['_id']] = race_dict

    if not races:
        log.debug("No races available for betting!")
        return None, None, None, None, None

    races_arr, selection_ids, market_ids, event_ids, race_start_times = reformat_to_array(
        races)

    return races_arr, selection_ids, market_ids, event_ids, race_start_times
Exemplo n.º 8
0
def update_bets_with_settlement_from_betfair():
    log.info("Update bets with settlements from betfair")
    c = Container()
    settled_df, fees = c.get_cleared_orders()
    m = MongoManager()
    for index, row in settled_df.iterrows():
        m.update_orders(str(row['event_id']), row['selection_id'],
                        str(row['market_id']), 'SUCCESS', row['price_matched'],
                        row['price_requested'], row['size_settled'],
                        row['profit'])
Exemplo n.º 9
0
    def __init__(self, source, destination, use_local, use_archive):
        log.info("source {}".format(source))
        log.info("destination {}".format(destination))
        log.info("use_local {}".format(use_local))
        log.info("use_archive_db {}".format(use_archive))

        self.use_archive_db = use_archive
        self.m = MongoManager(use_remote=not use_local, use_archive=use_archive)
        self.source_collection = source
        self.destination_collection = destination
Exemplo n.º 10
0
def update_place_bets_from_orders():
    log.info("Update place_bets from orders collection")
    m = MongoManager()
    bf_profit = m.get_pnl_per_race_from_orders()

    for item in bf_profit:
        m.update_document(collection_name='place_bets',
                          id_name='market_id',
                          id_value=item['_id'],
                          field_name='bf_profit',
                          field_value=item['bf_profit'])
def query_neural_network_for_bets(X):
    """ input X has to be a np array of shape [number_of_requested_predictions, elements]
    with ts starting at t-000.
    Returns betting recommendation in the form of bet,lay,bet,lay... as booleans in the same order as X
    Return shape for 2 bets and 44 horses would be (2,88)
    """
    config = get_config()
    nn_class = getattr(sys.modules[__name__],
                       config.get('Betting', 'model_class'))
    n = nn_class()
    n.load_model()

    m = MongoManager()
    d = {}
    d['input'] = X.tolist()
    d['timestamp'] = datetime.now()
    predicted = (n.predict(X))
    d['output'] = predicted.tolist()
    log.info("Mean predicted profit across all bets: {}".format(
        np.mean(predicted)))
    if np.isnan(np.mean(predicted)):
        log.error("Tensorflow output: {}".format(predicted))
        log.error("Terminating thread")
        sys.exit()

    place_bet = predicted

    return place_bet, n.model_name
Exemplo n.º 12
0
def race_dataframes(max_runners, slice_hours):

    m = MongoManager()
    collection = m.mongodb_price.get_collection("Backtesting")
    m.mongodb_price.drop_collection("OddsTS2017")
    odds_collection = m.mongodb_price.get_collection("OddsTS2017")
    winner_sps = []
    for k, r in get_races(collection):
        try:
            if len(r.runners) > max_runners:
                # Too many runners in this race
                continue
            if dt.datetime.strptime(r.marketTime,
                                    "%Y-%m-%dT%H:%M:%S.%fZ") < dt.datetime(
                                        2017, 1, 1):
                continue
            #winner_sps.append((r.marketId, r.get_starting_prices()[r.get_winner()]))
            race_odds_df = r.get_hours_before(hours=slice_hours)
            race_odds_df.index = race_odds_df.index.map(
                lambda x: x.to_datetime().strftime("%H:%M"))
            #winner_index = list(r.get_odds_dataframe().columns).index(r.get_winner())
            odds_collection.insert_one({
                'ts': race_odds_df.to_dict(),
                'winner': r.get_winner()
            })
        except:
            continue
    pd.DataFrame.from_records(winner_sps)
Exemplo n.º 13
0
def lay_saftey_check(selection_id, last_price, current_lay_price, best_back):
    """ takes election ID and last scraped LTP as input, returns maximum lay """
    m = MongoManager()
    ltps = m.get_last_LTPs(selection_id, amount_of_prices=5)
    ltps.append(last_price)
    log.info("LTPs from db (not used): {}".format(ltps))

    try:

        # cap the value of the list to 500 and clean ltps from any erroneous/non-numerical value
        ltps = [
            min(x, 500) for x in ltps
            if isinstance(x, numbers.Number) and not np.isnan(x)
        ]

        # take the median
        median = np.median(ltps)

    except:
        log.warning(
            "Unable to calculate median from ltps: {}. Assigning a safety threshold"
            .format(ltps))
        median = 500

    last_price_multiplier = config.getfloat("Restrictions",
                                            "last_price_multiplier")
    multiple_to_best_competitor = config.getfloat(
        "Restrictions", "multiple_to_best_compatitor")
    absolute_maximum = config.getfloat("Restrictions", "absolute_maximum")

    max_lay = min(current_lay_price, absolute_maximum,
                  last_price * last_price_multiplier,
                  best_back * multiple_to_best_competitor)

    if max_lay < current_lay_price:
        log.warning(
            "Restrict lay from {0} to {1}. | LTPs: {2} | best back: {3}".
            format(current_lay_price, max_lay, ltps, best_back))

    log.info(
        "current_lay_price (back level n for replacing_unfilled): {}".format(
            current_lay_price))
    log.info("LTP median (not used): {}".format(median))
    log.info("Last price (ltp or back): {}".format(last_price))
    log.info("best back (=best competitor): {}".format(best_back))

    return max_lay
Exemplo n.º 14
0
    def save_races(self, col):
        m = MongoManager(use_archive=True, use_remote=False)
        for race in tqdm(self.races):
            out = []
            d = {}
            d['winner'] = 'na'
            d['losers'] = []

            for k, v in race.runners.items():
                if v.status == 'LOSER':
                    d['losers'].append(k)
                elif v.status == 'WINNER':
                    d['winner'] = k
            df = race.get_odds_dataframe(False)
            selection_ids = df.columns
            # df['overrun_at_start'] = sum(1 / race.get_starting_prices())
            df['countrycode'] = race.countryCode
            df['marketstarttime'] = race.marketTime
            df['marketid'] = race.marketId
            df['marketstarttime'] = df['marketstarttime'].values.astype('datetime64[s]')

            for selection_id in selection_ids:
                df_new = df[[selection_id, 'marketstarttime', 'countrycode', 'marketid']].reset_index()
                df_new.columns = ['timestamp', 'LTP', 'marketstarttime', 'countrycode', 'marketid']
                df_new['timestamp'] = df_new['timestamp'].values.astype('datetime64[s]')
                df_new['selection_id'] = selection_id
                df_new['seconds_until_start'] = df_new['marketstarttime'] - df_new['timestamp']
                df_new['seconds_until_start'] = df_new['seconds_until_start'].astype('timedelta64[s]')
                if selection_id == d['winner']:
                    df_new['winner'] = True
                else:
                    df_new['winner'] = False
                if selection_id in d['losers']:
                    df_new['loser'] = True
                else:
                    df_new['loser'] = False

                df_new = df_new[df_new['seconds_until_start'] < 70 * 60]

                if not d['winner'] == 'na':  # only races that have a winner
                    dirty = df_new.to_dict(orient='records')
                    clean = [x for x in dirty if type(x['LTP'] == float) and x['LTP'] > 1]  # remove nan and 0 LTPs
                    out.extend(clean)

            if len(out) > 0:
                m.insert_list_of_documents(col, out)
Exemplo n.º 15
0
def propagate_race_results():
    log.info("Propagating race results")
    m = MongoManager()
    races = m.get_races_without_results()
    results = m.load_race_results([r['market_id'] for r in races])
    log.info("Found {} results".format(len(results)))

    for r in results:
        m.update_placed_bet_with_result(r['marketid'], r['winner'],
                                        r['losers'])
Exemplo n.º 16
0
def eval_theoratical_pnl(args):
    log.info("Evaluating pnl")
    m = MongoManager()
    races = m.get_bets_to_calculate_pnl(
        overwrite_calculated_pnls=args['--overwrite_calculated_pnls'])

    for race in tqdm(races):
        back_pnl = [0]
        lay_pnl = [0]
        losers = race['losers']
        for i, horse in enumerate(race['selection_ids']):
            won = horse == race['winner']
            loser = horse in losers
            back_odds = race['ltps'][
                i]  # back_prices would understate the theoretical pnl
            lay_odds = race['ltps'][
                i]  # lay_prices would understate the theoretical pnl

            valid = 1 if lay_odds > 1 else 0  # exclude zero odds from theoretical pnl

            theoretical_pnl = [0]

            fees = config.getfloat("Betting", "fees")
            stake = float(race['stake'])
            if won and race['bets'][i * 2]:  # back on winner
                change = (back_odds * stake - stake) * (1 - fees)
                back_pnl.append(change * valid)
                theoretical_pnl.append(change * valid)
            if won and race['bets'][i * 2 + 1]:  # lay on winner
                change = -(lay_odds * stake - stake)
                lay_pnl.append(change * valid)
                theoretical_pnl.append(change * valid)
            if loser and race['bets'][i * 2]:  # back on loser
                change = -stake  # pylint: disable=E1130
                back_pnl.append(change * valid)
                theoretical_pnl.append(change * valid)
            if loser and race['bets'][i * 2 + 1]:  # lay on loser
                change = stake * (1 - fees)
                lay_pnl.append(change * valid)
                theoretical_pnl.append(change * valid)

            m.update_document2('orders', 'selection', horse, 'market_id',
                               race['market_id'], 'theoretical_pnl',
                               sum(theoretical_pnl))

            back_pnl_total = sum(back_pnl)
            lay_pnl_total = sum(lay_pnl)
            m.update_placed_bet_with_pnl(race['market_id'], back_pnl_total,
                                         lay_pnl_total)
Exemplo n.º 17
0
def get_data(countries, extra_columns=True):
    m = MongoManager()
    results = m.mongodb.price_scrape.find(({
        'countrycode': {
            '$in': ['GB', 'IE', 'US', 'AU']
        },
        'max_bookie_price': {
            '$exists': True
        },
        'seconds_until_start': {
            '$lt': 60
        }
    }))
    records = []
    keys = [
        'LTP', 'VWAP', 'back_prices0', 'back_sizes0', 'eventid', 'lay_prices0',
        'lay_sizes0', 'marketid'
    ]
    if extra_columns:
        keys.extend([
            'max_bookie_price', 'mean_bookie_price', 'median_bookie_price',
            'min_bookie_price', 'selection_id', 'total_matched', 'far_price',
            'near_price'
        ])
    for r in results:
        record = {k: v for k, v in r.items() if k in keys}
        records.append(record)
    df = pd.DataFrame.from_records(records)
    results = m.mongodb.results_scrape.find(
        {'marketid': {
            '$in': [x for x in df['marketid'].values]
        }})
    race_records = []
    for r in results:
        record = {(r['marketid'], r['winner']): 'WIN'}
        for l in r['losers']:
            record[(r['marketid'], l)] = 'LOSE'
        race_records.append(record)
    race_results = pd.concat([pd.Series(x) for x in race_records])
    race_results.name = 'outcome'
    df = df.set_index(['marketid', 'selection_id'])
    race_results.index.names = df.index.names
    #df=df.join(race_results).dropna()
    return df.join(race_results)
Exemplo n.º 18
0
def get_classifier_df():
    m = MongoManager()
    results = m.mongodb.price_scrape.find(({
        'countrycode': {
            '$in': ['GB', 'IE']
        },
        'seconds_until_start': {
            '$lt': 60
        }
    }))
    records = []
    keys = [
        'LTP', 'VWAP', 'back_prices0', 'back_sizes0', 'eventid', 'lay_prices0',
        'lay_sizes0', 'marketid', 'max_bookie_price', 'mean_bookie_price',
        'median_bookie_price', 'min_bookie_price', 'selection_id',
        'total_matched'
    ]
    for r in results:
        record = {k: v for k, v in r.items() if k in keys}
        records.append(record)
    df = pd.DataFrame.from_records(records)
    results = m.mongodb.results_scrape.find(
        {'marketid': {
            '$in': [x for x in df['marketid'].values]
        }})
    race_records = []
    for r in results:
        record = {(r['marketid'], r['winner']): 'WIN'}
        for l in r['losers']:
            record[(r['marketid'], l)] = 'LOSE'
        race_records.append(record)
    race_results = pd.concat([pd.Series(x) for x in race_records])
    race_results.name = 'outcome'
    df = df.set_index(['marketid', 'selection_id'])
    race_results.index.names = df.index.names
    df = df.join(race_results).dropna()
    # df[(df['back_prob'] - df['bookie_prob']) > 0.025]
    df['back_prob'] = 1 / df['back_prices0']
    df['lay_prob'] = 1 / df['lay_prices0']
    df['bookie_prob'] = 1 / df['median_bookie_price']
    df['vwap_prob'] = 1 / df['VWAP']
    df['ltp_prob'] = 1 / df['LTP']
    return df
Exemplo n.º 19
0
    def get_races(self):
        m = MongoManager()
        collection = m.mongodb_price.get_collection("Backtesting")
        timeseries = []
        if self.markets.empty:
            print(
                "No markets set, have they been processed? Call .get_runners_and_markets"
            )
            return pd.DataFrame()

        pbar = tqdm(total=len(self.markets))
        with tarfile.open(self.tar_file, 'r') as tf:
            for m, e in self.markets:
                pbar.update()
                try:
                    data = bz2.decompress(
                        tf.extractfile(self.prefix.format(e, m)).read())
                except (ValueError, KeyError):
                    continue
                bfr = BetFairRace.create_from_json(data.decode('utf8'))
                try:
                    collection.insert_one({**bfr.to_dict(), **{'marketId': m}})
                except Exception as e:
                    pass
Exemplo n.º 20
0
def propagate_race_results_to_price_scrape(col='price_scrape'):
    """ add winner and loser field to price_scrape collection"""
    m = MongoManager()
    from_date = datetime.now() - timedelta(days=30)
    log.info("Getting prices without race results since {}".format(from_date))
    races = m.get_price_scrape_without_results(from_date, col)
    log.info("Got {} races. Loading results and updating {} collection".format(
        len(races), col))
    if len(races) < 1:
        log.info("No races to process")
        return

    c = int(np.ceil(len(races) / 100))
    for race_chunk in tqdm(chunk(races, c)):
        results = m.load_race_results(race_chunk)

        if len(results) < 1:
            log.info("No results to process")
            continue

        for r in results:
            m.update_price_scrape_with_result(col, r['marketid'], r['winner'],
                                              r['losers'])
Exemplo n.º 21
0
    def update_placed_orders(self, armed=False):
        config = get_config()
        m = MongoManager()

        open_orders = self.container.get_open_orders()

        use_level = config.get("Betting", "use_level").split(',')
        # minutes_to_start
        if len(open_orders) > 0:
            log.info("Currently open order: {}".format(len(open_orders)))
        for market, orders in open_orders.items():
            log.info("Updating order for market: {}".format(market))
            new_prices = self.container.get_single_market(market)
            updated_prices = []
            updated_betids = []
            for o in orders:
                selection_id = o.selection_id
                secs_to_start = m.get_secs_to_start(market)
                log.info("Seconds to start: {}".format(secs_to_start))
                if not secs_to_start or secs_to_start < \
                        -config.getint("Betting", "update_until_secs_after_start"):  # pylint: disable=E1130
                    log.info("Ignoring order\n----------------")
                    continue
                log.info("Selection ID: {}".format(selection_id))
                ltp_price = 0
                lays = [0, 0, 0]
                backs = [0, 0, 0]
                for r in new_prices[0].runners:
                    if r.selection_id != selection_id:
                        continue
                    backs = [
                        r.ex.available_to_back[x].price
                        for x in range(len(r.ex.available_to_back))
                    ]
                    lays = [
                        r.ex.available_to_lay[x].price
                        for x in range(len(r.ex.available_to_lay))
                    ]
                    ltp_price = r.last_price_traded
                log.info("New backs: {}".format(backs))
                log.info("New lays: {}".format(lays))
                min_to_start = math.floor(secs_to_start / 60)
                log.info("Minutes until start: {}".format(min_to_start))
                try:
                    level_at_minutes_to_start = int(use_level[min_to_start])
                    log.info(
                        "Using Level: {}".format(level_at_minutes_to_start))
                except IndexError:
                    log.info(
                        "use_level not set for {} minutes to start. Aborting".
                        format(min_to_start))
                    return

                level_at_minutes_to_start = min(level_at_minutes_to_start,
                                                len(backs) + 1)
                log.info(
                    "Final level to use: {}".format(level_at_minutes_to_start))
                new_price = ltp_price if level_at_minutes_to_start == 0 else backs[
                    level_at_minutes_to_start - 1]

                log.info("Updating to new price: {}".format(new_price))
                new_price = lay_saftey_check(selection_id=o.selection_id,
                                             last_price=ltp_price,
                                             current_lay_price=new_price,
                                             best_back=backs[0])
                new_price = price_adjustment(new_price)
                updated_betids.append(o.bet_id)
                updated_prices.append(new_price)
                if armed:
                    m.update_order_price(o.market_id, o.selection_id, o.bet_id,
                                         min_to_start, new_price)

            if len(updated_betids) > 0:
                log.info("Updated bet_ids: {}".format(updated_betids))
                log.info("Updated prices: {}".format(updated_prices))
                if armed:
                    self.container.replace_orders(market, updated_betids,
                                                  updated_prices)
Exemplo n.º 22
0
 def setUp(cls):
     cls.mongodb = MongoManager(use_archive=True)
Exemplo n.º 23
0
h2h_test = h2h[h2h['RACEDATE'] >= cutoff_date]

# Reduced Dataset without track-specific details (to be incorporated later)
h2h_red = h2h.drop(track_cols, axis=1)
h2h_train_red = h2h_train.drop(track_cols, axis=1)
h2h_test_red = h2h_test.drop(track_cols, axis=1)

# Save data as json
json.dump(h2h.to_json(orient='index'), open(path + 'h2h.json', 'wb'))
json.dump(h2h_train.to_json(orient='index'), open(path + 'h2h_train.json',
                                                  'wb'))
json.dump(h2h_test.to_json(orient='index'), open(path + 'h2h_test.json', 'wb'))
json.dump(h2h_red.to_json(orient='index'), open(path + 'h2h_red.json', 'wb'))
json.dump(h2h_train_red.to_json(orient='index'),
          open(path + 'h2h_train_red.json', 'wb'))
json.dump(h2h_test_red.to_json(orient='index'),
          open(path + 'h2h_test_red.json', 'wb'))

# Upload data into MongoDB
client = MongoManager()
client.upload_dataframe(h2h, 'h2h')
client.upload_dataframe(h2h_red, 'h2h_red')

#----------#
# ANALYSIS #
#----------#
training_set = h2h_train_red.values
test_set = h2h_test_red.values

# NOW LET'S HAVE FUN!
Exemplo n.º 24
0
class DEBase():
    def __init__(self, source, destination, use_local, use_archive):
        log.info("source {}".format(source))
        log.info("destination {}".format(destination))
        log.info("use_local {}".format(use_local))
        log.info("use_archive_db {}".format(use_archive))

        self.use_archive_db = use_archive
        self.m = MongoManager(use_remote=not use_local, use_archive=use_archive)
        self.source_collection = source
        self.destination_collection = destination

    def map_reduce(self, has_bookie=False):
        # check for latest entry in historical database
        # self.last_datetime = self.m.get_latest_date_in_enriched_prices(self.destination_collection)
        # if not self.last_datetime:
        self.last_datetime = dt.datetime(2018, 1, 1)

        log.info("Get available marketids")
        new_marketids = self.m.get_distinct('marketid',
                                            self.source_collection,
                                            self.last_datetime,
                                            must_have_bookie=has_bookie)
        log.info("got {}".format(len(new_marketids)))

        log.info("Get marketids already in enriched database")
        already_available_markeids = self.m.get_distinct('marketid',
                                                         self.destination_collection,
                                                         self.last_datetime,
                                                         must_have_bookie=False)
        log.info("got {}".format(len(already_available_markeids)))

        marketids_to_collect = list(new_marketids - already_available_markeids)

        log.info("Start download from date: {}".format(self.last_datetime))
        log.info("Download new marketids to enrich in chunks. Total marketids to process: {}".format(len(marketids_to_collect)))

        # set to higher value if larger amount needs to be processed simulataneiously, but if keys
        # are missing at certain time periods (such as mean_bookie_price t-0) the whole batch will be ignored

        batchsize = 1

        c = int(np.ceil(len(marketids_to_collect) / batchsize))

        for marketids in tqdm(chunk(marketids_to_collect, c)):
            self.historicals = self.m.download_raw_prices(self.last_datetime,
                                                          marketids,
                                                          self.source_collection,
                                                          max_rows=999999999,
                                                          has_bookie=has_bookie)

            self.df = pd.DataFrame.from_dict(self.historicals)
            del self.historicals
            l=len(self.df)
            if l==0:
                # log.warning("No new races found that match the criteria in current chunk")
                continue
            else:
                log.info("Total bets enriching: {}".format(l))

            try:
                self.unstack_timeseries_to_closest_minutes()
            except ValueError:
                continue
                # log.warning("error")

            try:
                self.calculate_metrics()
            except KeyError:
                log.warning("Key Error.  Ignoring the whole batch. Make sure batch size is 1.")
                continue

            self.calculate_payoffs()
            self.output_formatting()

            db = 'archive' if self.use_archive_db else 'price'
            self.df['timestamp'] = dt.datetime.now()
            self.m.upload_dataframe(self.df, self.destination_collection, db)

    def unstack_timeseries_to_closest_minutes(self, fields_to_stack=['LTP']):
        """ convert the times to an index t-000, t-001, t-002..."""
        self.df['minutes_to_start'] = 't-' + np.floor(self.df['seconds_until_start'] / 60).astype('int').astype('str')
        fields_to_stack.extend(['countrycode', 'loser', 'marketid', 'marketstarttime',
                                'selection_id', 'winner',
                                'minutes_to_start'])
        self.df = self.df[fields_to_stack]
        self.df = self.df.sort_values('minutes_to_start', ascending=False)
        self.df = self.df.drop_duplicates(['selection_id', 'marketid', 'minutes_to_start'], keep='last')
        self.df = self.df.set_index(['marketid', 'selection_id', 'countrycode', 'loser', 'marketstarttime',
                                     'winner', 'minutes_to_start'])
        self.df = self.df.unstack(level=6).reset_index()
        self.df.columns = [' '.join(col).strip() for col in self.df.columns.values]

    def calculate_metrics(self):
        """ stack timeseries and horses along columns in multi index dataframe """
        self.df['LTP'] = self.df['LTP t-0']
        self.df['horse'] = self.df.groupby('marketid').cumcount()  # add horse number in each group
        self.df_indexed = self.df.set_index(['marketid', 'horse'])

        log.info("Calculating 1d metrics")
        average = self.df_indexed['LTP'].mean(axis=0, level=0).to_frame().reset_index()
        average.columns = ['marketid', 'average']
        minimum = self.df_indexed['LTP'].min(axis=0, level=0).to_frame().reset_index()
        minimum.columns = ['marketid', 'minimum']
        maximum = self.df_indexed['LTP'].max(axis=0, level=0).to_frame().reset_index()
        maximum.columns = ['marketid', 'maximum']
        median = self.df_indexed['LTP'].median(axis=0, level=0).to_frame().reset_index()
        median.columns = ['marketid', 'median']
        std = self.df_indexed['LTP'].std(axis=0, level=0).to_frame().reset_index()
        std.columns = ['marketid', 'std']
        participants = self.df_indexed.count(level=0)['winner'].to_frame().reset_index()
        participants.columns = ['marketid', 'participants']
        skew = self.df_indexed['LTP'].skew(axis=0, level=0).to_frame().reset_index()
        skew.columns = ['marketid', 'skew']
        kurtosis = self.df_indexed['LTP'].kurtosis(axis=0, level=0).to_frame().reset_index()
        kurtosis.columns = ['marketid', 'kurtosis']
        overrun = (1 / self.df_indexed['LTP']).sum(axis=0, level=0).to_frame().reset_index()
        overrun.columns = ['marketid', 'overrun']

        # stat metrics of historical elements
        log.info("Calculating 2d metrics")
        all_price_headings = ['LTP t-' + str(i) for i in range(60)]

        log.info("Calculating average_2d")
        average_2d = self.df_indexed[all_price_headings].stack().mean(level=0).to_frame().reset_index()
        average_2d.columns = ['marketid', 'average_2d']

        log.info("Calculating min_2d")
        min_2d = self.df_indexed[all_price_headings].stack().min(level=0).to_frame().reset_index()
        min_2d.columns = ['marketid', 'min_2d']

        log.info("Calculating max_2d")
        max_2d = self.df_indexed[all_price_headings].stack().max(level=0).to_frame().reset_index()
        max_2d.columns = ['marketid', 'max_2d']

        log.info("Calculating median_2d")
        median_2d = self.df_indexed[all_price_headings].stack().median(level=0).to_frame().reset_index()
        median_2d.columns = ['marketid', 'median_2d']

        log.info("Calculating std_2d")
        std_2d = self.df_indexed[all_price_headings].stack().std(level=0).to_frame().reset_index()
        std_2d.columns = ['marketid', 'std_2d']

        log.info("Calculating skew_2d")
        skew_2d = self.df_indexed[all_price_headings].stack().skew(level=0).to_frame().reset_index()
        skew_2d.columns = ['marketid', 'skew_2d']

        log.info("Calculating kurtosis_2d")
        kurtosis_2d = self.df_indexed[all_price_headings].stack().kurtosis(level=0).to_frame().reset_index()

        kurtosis_2d.columns = ['marketid', 'kurtosis_2d']

        log.info("Merging 1d metrics")
        self.df = self.df.merge(average, on='marketid')
        self.df = self.df.merge(minimum, on='marketid')
        self.df = self.df.merge(maximum, on='marketid')
        self.df = self.df.merge(median, on='marketid')
        self.df = self.df.merge(std, on='marketid')
        self.df = self.df.merge(participants, on='marketid')
        self.df = self.df.merge(skew, on='marketid')
        self.df = self.df.merge(kurtosis, on='marketid')
        self.df = self.df.merge(overrun, on='marketid')

        log.info("Merging 2d metrics")
        self.df = self.df.merge(average_2d, on='marketid')
        self.df = self.df.merge(min_2d, on='marketid')
        self.df = self.df.merge(max_2d, on='marketid')
        self.df = self.df.merge(median_2d, on='marketid')
        self.df = self.df.merge(std_2d, on='marketid')
        self.df = self.df.merge(skew_2d, on='marketid')
        self.df = self.df.merge(kurtosis_2d, on='marketid')

    def calculate_payoffs(self):
        self.df = self.df.fillna(0)
        config = get_config()
        fees = config.getfloat("Betting", "fees")
        stake = 1  # hard coded for training for comparison purposes

        """ Creates X and Y in assigning the payoff to Y by removing the winner column from original df """

        payoff_back = np.where(self.df['winner'].values,
                               (stake * self.df['LTP'].values - stake) * (1 - fees),  # back winner
                               -np.ones((self.df['LTP'].values.shape)) * stake)  # back loser
        payoff_back = np.where(self.df['winner'] != -1, payoff_back, 0)  # replace na with 0 payoff

        # payoff when lay: -odds when win, 1 when loss
        payoff_lay = np.where(self.df['winner'].values,
                              -(stake * self.df['LTP'].values - stake),  # lay winner
                              stake * np.ones((self.df['LTP'].values.shape)) * (1 - fees))  # lay loser
        payoff_lay = np.where(self.df['winner'] != -1, payoff_lay, 0)  # replace na with 0 payoff

        # keep the lay odds to analyse the potential risk on the lays we bet on
        winning_lay_risk = np.minimum((self.df['LTP'].values - 1) * (-1), np.zeros(payoff_lay.shape))

        self.df['back'] = payoff_back
        self.df['lay'] = payoff_lay
        self.df['lay_risk'] = winning_lay_risk

    def output_formatting(self):
        self.df = self.df[[
            'marketstarttime', 'countrycode', 'marketid', 'selection_id',

            'LTP t-0', 'LTP t-7', 'average', 'minimum', 'maximum', 'median', 'std', 'participants', 'skew', 'kurtosis',
            'average_2d', 'min_2d', 'max_2d', 'median_2d', 'std_2d', 'skew_2d', 'kurtosis_2d',
            'overrun',

            'winner', 'back', 'lay', 'lay_risk'
        ]]
        self.df = self.df.drop_duplicates(['marketid', 'selection_id'])
Exemplo n.º 25
0
def de_duplicate():
    m = MongoManager(use_remote=False, use_archive=True)
    df = m.get_dataframe("price_scrape_enriched2")
    df = df.drop_duplicates(['marketid', 'selection_id'])
    m.upload_dataframe(df, "price_scrape_enriched_deduplicated")
Exemplo n.º 26
0
def collect_prices(collection_name='price_scrape', single_marketid=False):
    from horse_racing.betfair_manager.engine import Container as BFContainer
    from horse_racing.matchbook_manager.engine import Container as MBContainer

    try:
        bfm = BFContainer()
    except:
        log.error("Unable to log into betfair")

    if not single_marketid:
        try:
            mbm = MBContainer()
        except:
            log.error("Unable to log into matchbook")

    if single_marketid:
        events, markets = bfm.get_single_race(single_marketid)
    else:
        events, markets = bfm.get_all_races()

    try:
        prices = bfm.update_markets(events, markets)
    except AttributeError:
        log.info("No markets to scrape")
        return

    if not single_marketid:
        try:
            mb_prices = mbm.get_races()
        except:
            mb_prices = {}
            log.error("failed to get mb prices")
    else:
        mb_prices = {}

    event_mapping = {}
    bf_selection_name_mapping = {}
    for e, ms in markets.items():
        for m in ms:
            mb_market = None
            try:
                mb_market = mb_prices[(
                    m[1].event.venue,
                    m[1].market_start_time.strftime("%Y-%m-%dT%H:%M:00.000Z"))]
            except KeyError:
                log.info("Didn't find matchbook market for {}@{}".format(
                    m[1].market_start_time, m[1].event.venue))
            event_mapping[m[0]] = e, m[1].market_start_time, m[
                1].event.country_code, mb_market
            bf_selection_name_mapping.update(
                {r.selection_id: r.runner_name
                 for r in m[1]['runners']})

    for p in tqdm(prices):
        # find turf bookmaker odds
        turf_odds_df = pd.DataFrame()
        race_status = {}
        for m_id, m in markets[event_mapping[p.market_id][0]]:
            if m_id != p.market_id:
                continue
            market_start_time = event_mapping[p.market_id][1]
            venue = m.event.venue.lower()
            config = get_config()
            turf_countries = config.get('Scraping', 'turf_countries').split()
            if m.event.country_code not in turf_countries:
                continue
            # TODO, check and process turf odds
            try:
                turf_odds_df, race_status = get_race_odds(
                    venue, market_start_time, m.runners)
            except:
                pass

        mb_market = event_mapping[p.market_id][3]
        for r in p.runners:
            back_prices, back_sizes, lay_prices, lay_sizes = [-1, -1, -1], [
                -1, -1, -1
            ], [-1, -1, -1], [-1, -1, -1]
            try:
                vwap = get_VWAP(r.ex.traded_volume)
            except ZeroDivisionError:
                vwap = 0.
            for i in range(3):

                try:
                    back_prices[i] = r.ex.available_to_back[i].price
                except IndexError:
                    back_prices[i] = None

                try:
                    lay_prices[i] = r.ex.available_to_lay[i].price
                except IndexError:
                    lay_prices[i] = None

                try:
                    back_sizes[i] = r.ex.available_to_back[i].size
                except IndexError:
                    back_sizes[i] = None

                try:
                    lay_sizes[i] = r.ex.available_to_lay[i].size
                except IndexError:
                    lay_sizes[i] = None

            try:
                sp_dict = {
                    'near_price':
                    r.sp.near_price,
                    'far_price':
                    r.sp.far_price,
                    'actual_SP':
                    r.sp.actual_SP,
                    'back_SP_amounts':
                    [(a.price, a.size) for a in r.sp.back_stake_taken],
                    'lay_SP_amounts':
                    [(a.price, a.size) for a in r.sp.back_stake_taken]
                }
            except:
                sp_dict = {}

            seconds_until_start = (event_mapping[p.market_id][1] -
                                   datetime.datetime.now()).total_seconds()
            event_id = event_mapping[p.market_id][0]
            d = {
                'marketid': p.market_id,
                'selection_id': r.selection_id,
                'LTP': r.last_price_traded,
                'back_sizes0': back_sizes[0],
                'back_prices0': back_prices[0],
                'back_sizes1': back_sizes[1],
                'back_prices1': back_prices[1],
                'back_sizes2': back_sizes[2],
                'back_prices2': back_prices[2],
                'lay_sizes0': lay_sizes[0],
                'lay_prices0': lay_prices[0],
                'lay_sizes1': lay_sizes[1],
                'lay_prices1': lay_prices[1],
                'lay_sizes2': lay_sizes[2],
                'lay_prices2': lay_prices[2],
                'timestamp': datetime.datetime.now(),
                'eventid': event_id,
                'marketstarttime': event_mapping[p.market_id][1],
                'countrycode': event_mapping[p.market_id][2],
                'seconds_until_start': seconds_until_start,
                'total_matched': r.total_matched,
                'VWAP': vwap,
            }
            d.update(sp_dict)
            if not turf_odds_df.empty:
                # Add the turf odds
                try:
                    bookie_prices = turf_odds_df.loc[
                        r.selection_id].values.astype(float)
                    bookie_prices[bookie_prices ==
                                  0] = np.nan  # replace 0 with nan

                    d['bookies'] = pd.DataFrame({'name': turf_odds_df.loc[r.selection_id].index,
                                                 'price': bookie_prices}). \
                        to_dict(orient='records)')
                    d['mean_bookie_price'] = np.nanmean(bookie_prices)
                    d['median_bookie_price'] = np.nanmedian(bookie_prices)
                    d['min_bookie_price'] = np.nanmin(bookie_prices)
                    d['max_bookie_price'] = np.nanmax(bookie_prices)

                except KeyError:
                    log.info("No turf odds for selection: {}".format(
                        r.selection_id))

            if mb_market:
                d['mb_market_id'] = mb_market['id']
                runner_name = bf_selection_name_mapping[r.selection_id]
                mb_selection_id = None
                for mr in mb_market['runners']:
                    if mr['name'].lstrip('1234567890- ') == runner_name:
                        mb_selection_id = mr['id']
                        d['mb_selection_id'] = mb_selection_id
                        d['mb_volume'] = mr['volume']
                        back_index, lay_index = 0, 0
                        for mp in mr['prices']:
                            if mp['side'] == 'back':
                                d['mb_back_prices{}'.format(
                                    back_index)] = mp['decimal-odds']
                                d['mb_back_sizes{}'.format(
                                    back_index)] = mp['available-amount']
                                back_index += 1
                            else:
                                d['mb_lay_prices{}'.format(
                                    back_index)] = mp['decimal-odds']
                                d['mb_lay_sizes{}'.format(
                                    back_index)] = mp['available-amount']
                                lay_index += 1
                if not mb_selection_id:
                    log.warning(
                        "Didn't find runner information for bf selection id {}"
                        .format(r.selection_id))

            d.update(race_status)
            log.debug(d)
            m = MongoManager()
            success = m.insert_document(collection_name, d)
            log.debug(success)
Exemplo n.º 27
0
def place_bets(bets,
               selection_ids,
               prices,
               market_ids,
               event_id,
               race_start_time,
               container,
               armed=False,
               model_name=None,
               reference_price_for_initial_bet=0):
    log.info("Placing bets: {}".format(bets))
    horses = len(selection_ids)
    back_prices, lay_prices, back1, lay1, back2, lay2, ltp_prices = prices
    bets = bets[0:horses * 2]  # cut off padded horses
    log.info("len bets: {}".format(len(bets)))
    log.info("len selection_ids: {}".format(len(selection_ids)))
    unit_stake = config.getfloat("Betting", "stake")
    m = MongoManager()
    d = {}
    d['bets'] = bets
    d['selection_ids'] = selection_ids
    d['timestamp'] = datetime.datetime.now()
    d['armed'] = armed
    d['back_prices'] = back_prices.tolist()
    d['lay_prices'] = lay_prices.tolist()
    d['market_id'] = market_ids
    d['event_id'] = event_id
    d['ltps'] = ltp_prices.tolist()
    d['stake'] = float(unit_stake)

    # convert to datetime for mongodb saving
    if not type(race_start_time) == datetime.datetime:
        race_start_datetime = datetime.datetime.combine(
            datetime.date.today(), race_start_time)
    else:
        race_start_datetime = race_start_time
    d['race_start'] = race_start_datetime

    m.insert_document('place_bets', d, database='pnl')

    if armed:
        #
        # c = Container()
        for i, b in enumerate(bets):
            if not b:
                continue
            log.info("++++++++++++++++++++++++++++++++++++++++++++")
            bet_type = 'BACK' if i % 2 == 0 else 'LAY'
            price = back_prices[i //
                                2] if bet_type == 'BACK' else lay_prices[i //
                                                                         2]

            ask_price = price
            stake = unit_stake

            if bet_type == 'LAY':
                price = lay_saftey_check(selection_id=selection_ids[i // 2],
                                         last_price=ltp_prices[i // 2],
                                         current_lay_price=price,
                                         best_back=back_prices[i // 2])

            theoretical_ltp_price = price
            # additional reducrion for early orders
            if reference_price_for_initial_bet == '0':
                log.info("Using ltp as reference")
            elif reference_price_for_initial_bet == '1':
                price = min(price,
                            back_prices[i // 2]) if bet_type == 'LAY' else min(
                                price, lay_prices[i // 2])
                log.info("Using level 1 as reference")
            elif reference_price_for_initial_bet == '2':
                price = min(price, back1[i //
                                         2]) if bet_type == 'LAY' else min(
                                             price, lay1[i // 2])
                log.info("Using level2  as reference")
            elif reference_price_for_initial_bet == '3':
                price = min(price, back2[i //
                                         2]) if bet_type == 'LAY' else min(
                                             price, lay2[i // 2])
                log.info("Using level3  as reference")
            else:
                log.error("Invalid value for reference_price_for_initial_bet")

            price = price_adjustment(price)

            log.info(
                "Placing order on betfair: marketid: {}, selection: {}, price: {}"
                .format(market_ids, selection_ids[i // 2], price))

            result = container.place_limit_order(market_ids,
                                                 selection_ids[i // 2],
                                                 stake,
                                                 price,
                                                 side=bet_type,
                                                 strategy=model_name)

            try:
                m.insert_document(
                    'orders', {
                        'market_id': market_ids,
                        'event_id': event_id,
                        'race_start': race_start_time,
                        'selection': selection_ids[i // 2],
                        'timestamp': datetime.datetime.now(),
                        'stake': float(stake),
                        'price': price,
                        'theoretical_ltp_price': theoretical_ltp_price,
                        'ltp': ltp_prices[i // 2],
                        'original_ask_price': ask_price,
                        'side': bet_type,
                        'result': result.status,
                        'error_code': result.error_code,
                        'size_matched':
                        result.instruction_reports[0].size_matched,
                        'average_price_matched':
                        result.instruction_reports[0].average_price_matched,
                        'betid': result.instruction_reports[0].bet_id,
                        'model_name': model_name
                    },
                    database='pnl')
            except:
                log.warning(
                    "inserting orders document failed, marketid: {}".format(
                        market_ids))

            log.info("Result: {}, {}, matched {} on avg of {}".format(
                result.status, result.error_code,
                result.instruction_reports[0].size_matched,
                result.instruction_reports[0].average_price_matched))
            try:
                if result.status == "SUCCESS":
                    m.update_document('successful_bets',
                                      'market_id',
                                      market_ids,
                                      "successful_bets",
                                      selection_ids[i // 2],
                                      as_array=True)
            except:
                log.warning(
                    "Updating place_bets collection failed, marketid: {}".
                    format(market_ids))

        return True
Exemplo n.º 28
0
def list_all_bets():
    m = MongoManager()
    orders = m.get_all_orders()
    df = pd.DataFrame.from_dict(orders)
    df.fillna(value=0.0, inplace=True)
    log.info("Processing through orders: {}".format(len(df)))

    # Sanity Check
    if len(df) == 0:
        log.warning("No Pnl can be calculated as dataframe is empty")
        sys.exit()

    # Common variables
    fees = config.getfloat("Betting", "fees")
    x = np.array(range(len(df)))
    titles = [
        'bf_win', 'theoretical_win', 'bf_loss', 'theoretical_loss', 'price'
    ]
    xlabels = df['selection'].tolist()
    ylabels = [
        'Realized Profit', 'Theoretical Profit', 'Realized Loss',
        'Theoretical Loss', 'Price'
    ]
    colors = ['r', 'g', 'r', 'g', 'b']
    dir_path = os.path.dirname(os.path.realpath(__file__))
    df['bf_win'] = df['bf_profit'].clip(lower=0) * (1 - fees)
    df['bf_loss'] = df['bf_profit'].clip(upper=0)
    df['theoretical_win'] = df['theoretical_pnl'].clip(lower=0)
    df['theoretical_loss'] = df['theoretical_pnl'].clip(upper=0)

    # PnL Plots Borders
    win_borders = None
    loss_borders = None
    if not np.all(np.isnan(df['bf_profit'])) and not np.all(
            np.isnan(df['theoretical_pnl'])):
        win_borders = [
            0,
            int(
                np.nanmax([
                    np.nanmax(df['bf_win']),
                    np.nanmax(df['theoretical_win'])
                ]))
        ]
        loss_borders = [
            int(
                np.nanmin([
                    np.nanmin(df['bf_loss']),
                    np.nanmin(df['theoretical_loss'])
                ])), 0
        ]

    # Plot Actual/Theoretical Wins
    # Overall View
    f, ax = plt.subplots()
    w = 0.3
    ax.bar(x, df['bf_win'], width=w, color='r', align='center')
    ax.bar(x + w, df['theoretical_win'], width=w, color='g', align='center')
    ax.autoscale(tight=True)
    ax.set_title('Wins in the last 24h')
    ax.set_ylabel('Profits')
    ax.minorticks_on()
    ax.set_xticks([])
    plt.setp(ax.get_xticklabels(), visible=False)
    plt.legend(['Realized', 'Theoretical'])
    if win_borders:
        ax.set_ylim([win_borders[0], 1.05 * win_borders[1]])
    plt.savefig(os.path.join(dir_path, '../static/orders_wins_overall.png'),
                dpi=180)

    # Individual View
    f, axs = plt.subplots(nrows=2)
    for i in range(0, 2):
        ax = axs[i]
        ax.bar(x, df[titles[i]], color=colors[i])
        ax.set_ylabel(ylabels[i])
        ax.minorticks_on()
        ax.set_xticks([])
        plt.setp(ax.get_xticklabels(), visible=False)
        if win_borders:
            ax.set_ylim([win_borders[0], 1.05 * win_borders[1]])
    axs[0].set_title('Wins in the last 24 hours')
    f.subplots_adjust(hspace=0)
    f.subplots_adjust(wspace=0)
    plt.savefig(os.path.join(dir_path, '../static/orders_wins.png'), dpi=180)

    # Plot Actual/Theoretical Losses
    # Overall View
    f, ax = plt.subplots()
    w = 0.6
    ax.bar(x, df['bf_loss'], width=w, color='r', align='center')
    ax.bar(x + w, df['theoretical_loss'], width=w, color='g', align='center')
    ax.autoscale(tight=True)

    ax.set_title('Losses in the last 24h')
    ax.set_ylabel('Losses')
    ax.minorticks_on()
    ax.set_xticks([])
    plt.setp(ax.get_xticklabels(), visible=False)
    plt.legend(['Realized', 'Theoretical'])
    if win_borders:
        ax.set_ylim([loss_borders[0] * 1.05, loss_borders[1]])
    plt.savefig(os.path.join(dir_path, '../static/orders_losses_overall.png'),
                dpi=180)

    # Individual View
    f, axs = plt.subplots(nrows=2)
    for i in range(0, 2):
        ax = axs[i]
        ax.bar(x, df[titles[i + 2]], color=colors[i + 2])
        ax.set_ylabel(ylabels[i + 2])
        ax.minorticks_on()
        ax.set_xticks([])
        plt.setp(ax.get_xticklabels(), visible=False)
        if loss_borders:
            ax.set_ylim([loss_borders[0] * 1.05, loss_borders[1]])
    f.subplots_adjust(hspace=0)
    f.subplots_adjust(wspace=0)
    plt.savefig(os.path.join(dir_path, '../static/orders_losses.png'), dpi=180)

    # Plot Bet Prices
    f, ax = plt.subplots()
    ax.bar(x, df[titles[-1]], color=colors[-1])
    ax.set_ylabel(ylabels[-1])
    ax.set_xlabel('Bets (Selection IDs)')
    ax.set_xticks(range(len(df)))
    ax.set_xticklabels(xlabels, rotation=90, fontsize=2)
    f.subplots_adjust(hspace=0)
    f.subplots_adjust(wspace=0)
    plt.savefig(os.path.join(dir_path, '../static/bets.png'), dpi=180)
Exemplo n.º 29
0
def find_outliers():
    m = MongoManager()
    prices = m.mongodb.price_scrape.find({'selection_id': 7767293})
    price_df = pd.DataFrame.from_records(prices)
    pass
Exemplo n.º 30
0
def pnl_charts(start=0, end=0):
    log.info("Create charts")
    dir_path = os.path.dirname(os.path.realpath(__file__))
    m = MongoManager()

    # 24h bar chart
    data = m.get_all_pnl()
    df = pd.DataFrame(data)

    fees = config.getfloat("Betting", "fees")
    df.ix[df['bf_profit'] > 0] = df.ix[df['bf_profit'] > 0] * (1 - fees)

    if len(df) > 0:
        total_pnl = df['sum_total'].sum().round(1)
        total_back = df['sum_back'].sum().round(1)
        total_lay = df['sum_lay'].sum().round(1)
        bf_profit = df['bf_profit'].sum().round(1)

        ax = df.plot.bar()
        ax.set_title("Today's bets")
        ax.set_ylabel('Return')
        ax.set_xlabel('Races')
        ax.axhline(y=0, color='r', linestyle='-')
        # ax.text(0.5, -10, "Total PnL:  {}".format(total_pnl))
        # ax.text(0.5, -13, "Total Back: {}".format(total_back))
        # ax.text(0.5, -16, "Total Lay:  {}".format(total_lay))
        # ax.text(0.5, -19, "Total BF:  {}".format(bf_profit))
        dir_path = os.path.dirname(os.path.realpath(__file__))
        ax.get_figure().savefig(os.path.join(dir_path, '../static/chart.png'),
                                dpi=180)

        # cumulative line chart
        df2 = df.cumsum()
        ax = df2.plot()
        ax.set_title('Horse racing return last 24h')
        ax.set_ylabel('Return')
        ax.set_xlabel('Races')
        ax.axhline(y=0, color='r', linestyle='-')
        dir_path = os.path.dirname(os.path.realpath(__file__))
        ax.get_figure().savefig(os.path.join(dir_path,
                                             '../static/chart_cumulative.png'),
                                dpi=180)

    else:
        # create empty files

        log.warning("No bets in the last 24 hours")
        open(os.path.join(dir_path, '../static/chart.png'), 'w').close()
        open(os.path.join(dir_path, '../static/chart_cumulative.png'),
             'w').close()

    # cumulative line chart since last year
    from_date = config.get("PNL", "from_date")
    from_date = datetime.strptime(from_date, "%Y-%m-%d")
    data = m.get_all_pnl(from_date)
    df = pd.DataFrame(data)

    ranksum_stat, p_stat = ttest_ind(df['bf_profit'].fillna(0).values,
                                     np.zeros(len(df['bf_profit'].values)))

    df = df.cumsum()
    ax = df.plot()
    balance_dict = get_account_balance()
    balance = balance_dict['available_to_bet_balance'] - balance_dict[
        'exposure']
    ax.set_title(
        """Horse racing return last 365 days\np-stat: {:.2f} - {}\nCurrent Balance: {}"""
        .format(p_stat, p_stat < 0.05, balance))
    ax.set_ylabel('Return')
    ax.set_xlabel('Races')

    ax.axhline(y=0, color='r', linestyle='-')
    ax.get_figure().savefig(os.path.join(
        dir_path, '../static/chart_cumulative_year.png'),
                            dpi=180)