def create_or_update_market_type(): with dbm.get_managed_session() as session: Division.create_or_update(session, division_code=DivisionCodeEnum.PREMIER_LEAGUE, division_name='premier league') Division.create_or_update(session, division_code=DivisionCodeEnum.CHAMPIONSHIP, division_name='championship') Division.create_or_update(session, division_code=DivisionCodeEnum.LEAGUE_1, division_name='league 1') Division.create_or_update(session, division_code=DivisionCodeEnum.LEAGUE_2, division_name='league 2') Division.create_or_update(session, division_code=DivisionCodeEnum.CONFERENCE, division_name='conference') Division.create_or_update(session, division_code=DivisionCodeEnum.FRANCE_LEAGUE_1, division_name='france league 1') Division.create_or_update(session, division_code=DivisionCodeEnum.GERMANY_BUNDESLIGA, division_name='german bundesliga') Division.create_or_update(session, division_code=DivisionCodeEnum.GERMANY_BUNDESLIGA_2, division_name='germany bundesliga 2') Division.create_or_update(session, division_code=DivisionCodeEnum.ITALY_SERIE_A, division_name='italy serie a') Division.create_or_update(session, division_code=DivisionCodeEnum.ITALY_SERIE_B, division_name='italy series b') Division.create_or_update(session, division_code=DivisionCodeEnum.SPAIN_LA_LIGA, division_name='spain la liga') Division.create_or_update(session, division_code=DivisionCodeEnum.SPAIN_SEGUNDA, division_name='spain segunda') Division.create_or_update(session, division_code=DivisionCodeEnum.UEFA_CHAMPIONS_LEAGUE, division_name='uefa champions league') Division.create_or_update(session, division_code=DivisionCodeEnum.UEFA_EUROPA_LEAGUE, division_name='uefa europa league')
def create_or_update_bookies(): with dbm.get_managed_session() as session: Bookie.create_or_update(session, bookie_code=BookieNameEnum.MARATHON_BET, bookie_name='marathon bet') Bookie.create_or_update(session, bookie_code=BookieNameEnum.PADDY_POWER, bookie_name='marathon bet') Bookie.create_or_update(session, bookie_code=BookieNameEnum.BET_VICTOR, bookie_name='marathon bet') Bookie.create_or_update(session, bookie_code=BookieNameEnum.XBET_1, bookie_name='marathon bet') Bookie.create_or_update(session, bookie_code=BookieNameEnum.UNIBET, bookie_name='marathon bet') Bookie.create_or_update(session, bookie_code=BookieNameEnum.SPORT_888, bookie_name='marathon bet') Bookie.create_or_update(session, bookie_code=BookieNameEnum.BETWAY, bookie_name='marathon bet') Bookie.create_or_update(session, bookie_code=BookieNameEnum.SKY_BET, bookie_name='marathon bet') Bookie.create_or_update(session, bookie_code=BookieNameEnum.BETCLIC, bookie_name='marathon bet') Bookie.create_or_update(session, bookie_code=BookieNameEnum.LADBROKES, bookie_name='marathon bet') Bookie.create_or_update(session, bookie_code=BookieNameEnum.WILLIAM_HILL, bookie_name='marathon bet')
def convert_published_datetime_to_game_time(df): with dbm.get_managed_session() as session: in_play_map = { series_uid: ExchangeOddsSeries.get_by_uid(session, int(series_uid)).event.in_play_start for series_uid in df['series_uid'].unique() } new_df = pd.DataFrame(columns=['series_uid', 'game_time', 'ltp']) for series_uid in df['series_uid'].unique(): sub_df = df[df['series_uid'] == series_uid].copy(deep=True) sub_df = sub_df.set_index( pd.DatetimeIndex(sub_df['published_datetime'])) sub_df = sub_df.drop(['published_datetime', 'series_uid'], axis=1) sub_df = sub_df.resample('T').asfreq().fillna(method='ffill') sub_df = sub_df.reset_index() sub_df['series_uid'] = series_uid sub_df['in_play_time'] = sub_df['series_uid'].map(in_play_map) sub_df['game_time'] = ( sub_df['published_datetime'] - sub_df['in_play_time']).apply(lambda x: x.seconds / 60) sub_df = sub_df.drop(['in_play_time', 'published_datetime'], axis=1) if sub_df.iloc[0]['game_time'] != 0: initial_times = range(0, int(sub_df.iloc[0]['game_time'])) rows_to_insert = { 'series_uid': [series_uid] * len(initial_times), 'ltp': [sub_df.iloc[0]['ltp']] * len(initial_times), 'game_time': list(initial_times) } sub_df = pd.concat([pd.DataFrame(rows_to_insert), sub_df]) new_df = pd.concat([new_df, sub_df], ignore_index=True) df = new_df return df
def create_or_update_runners(): with dbm.get_managed_session() as session: for runner_code in RunnerCodeEnum.to_dict().values(): Runner.create_or_update( session, runner_code=runner_code, runner_betfair_code=inverse_runner_betfair_map.get( runner_code))
def create_or_update_sport(): with dbm.get_managed_session() as session: Sport.create_or_update(session, sport_code=SportCodeEnum.FOOTBALL, sport_name='football') Sport.create_or_update(session, sport_code=SportCodeEnum.HORSE_RACING, sport_name='horse racing')
def create_or_update_info_source(): with dbm.get_managed_session() as session: info_source_orgn = InfoSourceOrganisation.get_by_name( session, ISOEnum.BETFAIR) InfoSource.create_or_update( session, info_source_code=InfoSourceEnum.EXCHANGE_HISTORICAL, info_source_name='historical exchange data', info_source_organisation=info_source_orgn)
def create_or_update_countries(): with dbm.get_managed_session() as session: for country_name, country_code in mapping.items(): country_name = country_name.lower() country_name = country_name.translate( str.maketrans('', '', string.punctuation)) Country.create_or_update(session, country_name=country_name, country_code=country_code)
def add_event_uid_to_df(df): with dbm.get_managed_session() as session: unique_series_uids = df['series_uid'].unique() event_series_uid_map = { k: ExchangeOddsSeries.get_by_uid(session, int(k)).event_uid for k in unique_series_uids } df['event_uid'] = df['series_uid'].map(event_series_uid_map) return df
def create_or_update_item_freq_type(): with dbm.get_managed_session() as session: ItemFreqType.create_or_update( session, item_freq_type_code=ItemFreqTypeCodeEnum.MINUTE, item_freq_type_desc='minute') ItemFreqType.create_or_update( session, item_freq_type_code=ItemFreqTypeCodeEnum.SECOND, item_freq_type_desc='second')
def import_horse_racing_data(): n_rows = 0 for month in ['Mar']: root_dir = "C:\\Users\\rober\\sport_data\\horse_racing\\ADVANCED\\2020\\" root_dir = os.path.join(root_dir, month) for subdir, dirs, files in os.walk(root_dir): for file in files: try: with dbm.get_managed_session() as session: print(subdir) extractor = HorseRacingExchangeOddsExtractor( os.path.join(subdir, file)) event_data, market_data, df, total_pre_off_volume, total_volume = extractor.extract_data( ) event, _ = Event.create_or_update( session, event_data['event_id'], team_a=None, team_b=None, sport_code=SCEnum.HORSE_RACING) market_location_name = market_data['venue'].lower() market_location_code = market_data['venue'].upper() market_location_code = market_location_code.replace( ' ', '_') MarketLocation.create_or_update( session, market_location_code=market_location_code, market_location_name=market_location_name) market, existed = Market.create_or_update( session, market_betfair_id=str(market_data['market_id']), market_type_code=MTCEnum.WIN, event=event, pre_off_volume=total_pre_off_volume, total_volume=total_volume, off_time=event_data['off_time'], market_location_code=market_location_code) series, _ = ExchangeOddsSeries.create_or_update( session, event=event, market=market, item_freq_type_code=IFTCEnum.SECOND, info_source_code=ISEnum.EXCHANGE_HISTORICAL) df['series_uid'] = series.series_uid df.to_sql(name=tb.exchange_odds_series_item(), schema='public', con=session.connection(), if_exists='append', method='multi', index=False) n_rows += len(df) print(f"Added {len(df)} to db for total of {n_rows}") except Exception as exp: print(exp)
def create_or_update_market_type(): with dbm.get_managed_session() as session: MarketType.create_or_update( session, market_type_code=MarketTypeCodeEnum.CORRECT_SCORE, market_type_desc='correct final match score') MarketType.create_or_update( session, market_type_code=MarketTypeCodeEnum.OVER_UNDER_GOALS, market_type_desc='correct final match score') MarketType.create_or_update( session, market_type_code=MarketTypeCodeEnum.MATCH_ODDS, market_type_desc='match result odds') MarketType.create_or_update(session, market_type_code=MarketTypeCodeEnum.WIN, market_type_desc='horse racing win')
'solihull': 'solihull moors', 'maidstone': 'maidstone utd', 'salford': 'salford city', 'sutton': 'sutton utd', 'milton keynes dons': 'mk dons', 'notts county': 'notts co', 'cambridge': 'cambridge utd', 'oxford': 'oxford utd', 'bristol rvs': 'bristol rovers', 'peterboro': 'peterborough', 'halifax': 'fc halifax town', 'man united': 'man utd'} root_dir = r'C:\Users\rober\sport_data\CSV\football_data_com' with dbm.get_managed_session() as session: for subdir, dirs, files in os.walk(root_dir): for file in files: print(file) raw_df = pd.read_csv(os.path.join(subdir, file), encoding="ISO-8859-1") df = parse_football_data_com(raw_df) for index, row in df.iterrows(): team_a_name = row['home_team'].lower().translate(str.maketrans('', '', string.punctuation)) team_b_name = row['away_team'].lower().translate(str.maketrans('', '', string.punctuation)) for original_str, replacement_str in str_replacements.items(): team_a_name = team_a_name.replace(original_str, replacement_str) team_b_name = team_b_name.replace(original_str, replacement_str) try: team_a = Team.get_by_team_name(session, team_a_name) team_b = Team.get_by_team_name(session, team_b_name) except:
def get_processed_horse_racing_odds(runner_codes=None, from_date=None, until_date=None, market_type_code=None, division_codes=None, item_freq_type_code=None, min_market_total_volume=None, min_market_pre_off_volume=None, max_mins_from_off_time=60, market_location_code=None): """Return dataframe of horse racing odds in a user friendly format i.e. with columns as individuals horse odds or volume""" with dbm.get_managed_session() as session: inverse_horse_racing_map = { v: k for k, v in horse_racing_runner_map.items() } if runner_codes is None: runner_uids = horse_racing_runner_map.keys() else: runner_uids = [ inverse_horse_racing_map[code] for code in runner_codes ] df = ExchangeOddsSeriesItem.get_series_items_df( session, from_date=from_date, until_date=until_date, market_type_code=market_type_code, division_codes=division_codes, item_freq_type_code=item_freq_type_code, min_market_total_volume=min_market_total_volume, min_market_pre_off_volume=min_market_pre_off_volume, max_mins_from_off_time=max_mins_from_off_time, market_location_code=market_location_code) df['update_dict'] = df['update_json'].apply(lambda j: json.loads(j)) df = df.drop('update_json', axis=1) df = df.sort_values('published_datetime', ascending=True) # Add ltp and tv columns df[[runner_uid for runner_uid in runner_uids]] = np.nan for runner_uid in runner_uids: ltp_col_name = (horse_racing_runner_map[int(runner_uid)] + '_ltp').lower() tv_col_name = (horse_racing_runner_map[int(runner_uid)] + '_tv').lower() df[ltp_col_name] = df['update_dict'].apply( lambda d: d['ltp'].get(str(runner_uid))) df[tv_col_name] = df['update_dict'].apply( lambda d: d['tv'].get(str(runner_uid))) # fill nans for series_uid in df['series_uid'].unique(): series_ix = df['series_uid'] == series_uid df.loc[series_ix, ltp_col_name] = df.loc[series_ix, ltp_col_name].fillna( method='ffill') df.loc[series_ix, ltp_col_name] = df.loc[series_ix, ltp_col_name].fillna( method='bfill') df.loc[series_ix, tv_col_name] = df.loc[series_ix, tv_col_name].fillna( method='ffill') df.loc[series_ix, tv_col_name] = df.loc[series_ix, tv_col_name].fillna( method='bfill') # drop if column is all nan if df[ltp_col_name].isna().all(): df = df.drop([ltp_col_name, tv_col_name], axis=1) else: # convert to float df[ltp_col_name] = df[ltp_col_name].apply( lambda x: float(x) if x else None) df[tv_col_name] = df[tv_col_name].apply(lambda x: float(x) if x else None) # Drop unnecessary columns df = df.drop('update_dict', axis=1) df = df.drop([runner_uid for runner_uid in runner_uids], axis=1) df = df.dropna(axis=1, how='all') df = df.drop_duplicates(['series_uid', 'published_datetime']) return df
def create_or_update_info_source_organisation(): with dbm.get_managed_session() as session: InfoSourceOrganisation.create_or_update(session, organisation_name=InfoSourceOrganisationEnum.BETFAIR, organisation_url='www.betfair.co.uk')