def load_from_db(self, name): if self.client_db is None: self.client_db = get_db("prono", mongolab=True) db_obj = find_one({"name": name}, "prono", "learning_objects", connection=self.client_db) self.name = db_obj["name"] self.params = db_obj["params"] self.nn = pickle.loads(db_obj["nn"]) self.scalizer = pickle.loads(db_obj["scalizer"]) self.normalizer = pickle.loads(db_obj["normalizer"])
def save_to_db(self, name): if self.client_db is None: self.client_db = get_db("prono", mongolab=True) to_save = { "name": name, "params": self.params, "nn": pickle.dumps(self.nn), "scalizer": pickle.dumps(self.scalizer), "normalizer": pickle.dumps(self.normalizer) } return insert(to_save, "prono", "learning_objects", connection=self.client_db)
def load_game_id_features(game_id, features, mongolab=False): client = get_db("prono", mongolab=mongolab) game = find_game({"$query": {"_id": ObjectId(game_id)}}, connection=client) constructor_funct = { "last_seven_results": get_last_seven_results, "ranking": get_rankings, "last_seven_results_home_pov": get_last_seven_results_home_pov, "day": get_day, "last_seven_home_or_away": get_last_games_home_or_away, "last_seven_goals": last_seven_goals, "last_seven_shots": get_last_games_shots, "last_seven_goals_per_shots": get_last_games_goals_per_shots, "last_seven_rankings": get_last_seven_ranking, "last_seven_opponents_rankings": get_last_seven_opponent_ranking, "points": get_points } line = list() last_games = find_games( { "$query": { "division": ObjectId(game["division"]), "season": [str(game["season"][0]), str(game["season"][1])], "date": { "$lt": game["date"] } } }, connection=client) print game["date"] last_games = [l_g for l_g in last_games] for i, g in enumerate(last_games): if "final" not in g["score"]: last_games[i]["score"] = {"final": {"home": "2", "away": "2"}} for feature in features: f = constructor_funct[feature](game, last_games) if f is None: return None line += f # print line return line
from betlib.mongo import get_db from betlib.models.game import find_games, update_game import os import json clientMDB = get_db("prono") # clientMDB = get_mongolab_db("prono") #games_brasil_serie_a = [g for g in find_games({"link": {"$regex": "/brazil/serie-a"}})] #games_brasil_serie_a_links = list(set([g["link"] for g in games_brasil_serie_a])) #print len(games_brasil_serie_a) #print len(games_brasil_serie_a_links) json_path = "../data/crawl_soccerway_FR/games/" for json_filename in os.listdir(json_path): if "updated" not in json_filename or "2015" not in json_filename: continue with open(json_path + json_filename) as json_file: print json_filename games = json.load(json_file) for game in games: if "game_link" in game and "/netherlands/eredivisie/" in game[ "game_link"]: # if "game_link" in game and "/brazil/serie-a" in game["game_link"]: if game["years"] is not None: season = game["years"].split("/") if ( "/" in game["years"]) else game["years"] else: season = game["years"]
#!/usr/bin/env python # -*- coding: utf-8 -*- from betlib.models.team import add_team_alias from betlib.mongo import get_db client = get_db("prono", mongolab=True) aliases = { "Manchester City": "Man City", "West Ham United": "West Ham", "Blackburn Rovers": "Blackburn", "Tottenham Hotspur": "Tottenham", "Wigan Athletic": "Wigan", "Birmingham City": "Birmingham", "Newcastle United": "Newcastle", "Stoke City": "Stoke", "West Bromwich Albion": "West Brom", "Manchester United": "Man United", "Bolton Wanderers": "Bolton", "Wolverhampton Wanderers": "Wolves", "Queens Park Rangers": "QPR", "Cardiff City": "Cardiff", "Hull City": "Hull", "Swansea City": "Swansea", "Norwich City": "Norwich", "Leicester City": "Leicester", "PSG": "Paris SG", "PSG": "Paris Saint Germain", "Olympique Lyonnais": "Lyon", "Olympique Marseille": "Marseille",
import os import json from betlib.models.game import add_game from betlib.mongo import get_db count = 0 path = os.sys.argv[1] #for i, filename in enumerate(os.listdir(path)): # db_connection = get_db("prono") db_connection = get_db("prono") for i, filename in enumerate( ["calendar_2014_updated.json", "calendar_2015_updated.json"]): calendar = dict() with open(path + filename, "r") as f: calendar = json.load(f) f.close() for j, game_date in enumerate(calendar): for competition in calendar[game_date]: for g in calendar[game_date][competition]: if g["team_H"]["name"] is not None and g["team_A"][ "name"] is not None: if "/england/premier-league/" in g["game_link"]: # if "/brazil/serie-a/" in g["game_link"]: count += 1 print count ret = add_game( { "team_H": g["team_H"],
def buildDataset(srcs, features, with_odds, mongolab=False): client = get_db("prono", mongolab=mongolab) game_issues = {-1: [0, 0, 1], 0: [0, 1, 0], 1: [1, 0, 0]} # print srcs, features # print "=" * 30 # sys.exit(-1) data_X_all = list() data_Y_all = list() odds_all = list() for competition_range in srcs: competition = competition_range[0] begin = competition_range[1] end = competition_range[2] for season in range(int(begin[0:4]), int(end[0:4])): if len(begin) == 4 and len(end) == 4: games = [ g for g in find_games( { "$query": { "division": ObjectId(competition), "season": [str(season), str(season + 1)] }, "$orderby": { "date": 1 } }, connection=client) ] else: games = [ g for g in find_games( { "$query": { "division": ObjectId(competition), "season": [str(season), str(season + 1)] }, "date": { "$gte": begin, "$lt": end }, "$orderby": { "date": 1 } }, connection=client) ] for current_game in games: sub_games = [ g for g in games if g["date"] < current_game["date"] ] new_row = construct_row(current_game, sub_games, features) if new_row is None: continue data_X_all.append(new_row[0:-1]) data_Y_all.append(game_issues[new_row[-1]]) if (with_odds is True): odds_all.append([ float(current_game["bets"]["bet365"]["home"]), float(current_game["bets"]["bet365"]["draw"]), float(current_game["bets"]["bet365"]["away"]) ]) else: odds_all.append(None) return data_X_all, data_Y_all, odds_all
def update_game_in_db_from_soccerway(): client = get_db("prono", mongolab=True) now = datetime.now() + timedelta(5) today = "%d_%02d_%02d" % (int(now.year), int(now.month), int(now.day)) three_day_ago = datetime.now() - timedelta(5) today_minus_3_day = "%d_%02d_%02d" % (int( three_day_ago.year), int(three_day_ago.month), int(three_day_ago.day)) print today_minus_3_day print today games = [ g for g in find_games( {"date": { "$lte": today, "$gte": today_minus_3_day }}, connection=client) ] # games = [g for g in find_games({}, connection=client)] # all games print len(games) for game in games: if "link" in game: # print "http://fr.soccerway.com" + game["link"] if game["link"].startswith("http"): updated = game_page.get_page(game["link"]) else: updated = game_page.get_page("http://fr.soccerway.com" + game["link"]) # print updated.get("date", "") if updated is not None: if updated["scores"] is not None: if "half-time" in updated["scores"]: half_time = updated["scores"]["half-time"].split(" - ") scores = { "half_time": { "home": half_time[0], "away": half_time[1] } } else: half_time = None scores = dict() if "final" in updated["scores"]: final = updated["scores"]["final"].split(" - ") scores["final"] = {"home": final[0], "away": final[1]} else: final = None scores = None if updated["years"] is not None: updated["years"] = updated["years"].split("/") ret = update_game( {"link": game["link"]}, { "score": scores, "stadium": updated["stadium"], "region": updated["country"], "start_time": updated["start_time"], "season": updated["years"], "date": updated.get("date", None), "possession": updated.get("possession", None), "corners": updated.get("corners", None), "shots": updated.get("tirs", None), "shots_on_target": updated.get("tirs_cadres", None), "fouls": updated.get("fautes", None) }, verbose=True, connection=client) if ret is None: print game["link"], "is up to date in db." else: print "[success]", game["_id"]