def parse_competitions2(competitions, sport="football", *sites): if not sites: sites = ['betclic', 'betstars', 'bwin', 'france_pari', 'joa', 'netbet', 'parionssport', 'pasinobet', 'pmu', 'unibet', 'winamax', 'zebet'] sportsbetting.EXPECTED_TIME = 28 + len(competitions) * 12.5 selenium_sites = {"betstars", "bwin", "joa", "parionssport", "pasinobet", "unibet"} selenium_required = ((inspect.currentframe().f_back.f_code.co_name in ["<module>", "parse_thread"] or 'test' in inspect.currentframe().f_back.f_code.co_name) and (selenium_sites.intersection(sites) or not sites)) sportsbetting.SELENIUM_REQUIRED = selenium_required if selenium_required: ThreadPool(6).map(lambda x: selenium_init.start_selenium(x), selenium_sites.intersection(sites)) sportsbetting.PROGRESS = 0 sportsbetting.SUB_PROGRESS_LIMIT = len(sites) for competition in competitions: if competition == sport or "Tout le" in competition: import_teams_by_sport(sport) else: id_competition = get_id_formatted_competition_name(competition, sport)[0] if id_competition < 0: import_teams_by_competition_id_thesportsdb(id_competition) else: import_teams_by_url("http://www.comparateur-de-cotes.fr/comparateur/" + sport + "/a-ed" + str(id_competition)) sportsbetting.IS_PARSING = True list_odds = ThreadPool(6).map(lambda x: parse_competitions_site(competitions, sport, x), sites) sportsbetting.IS_PARSING = False if selenium_required: ThreadPool(6).map(lambda x: selenium_init.DRIVER[x].quit(), selenium_sites.intersection(sites)) sportsbetting.ODDS[sport] = merge_dict_odds(list_odds)
def parse_competition(competition, sport="football", *sites): """ Retourne les cotes d'une competition donnée pour un ou plusieurs sites de paris. Si aucun site n'est choisi, le parsing se fait sur l'ensemble des bookmakers reconnus par l'ARJEL """ if sportsbetting.ABORT: raise sportsbetting.AbortException try: _id, formatted_name = get_id_formatted_competition_name( competition, sport) except TypeError: print("Competition inconnue") return print(formatted_name, *sites) if not sites: sites = [ 'betclic', 'betstars', 'bwin', 'france_pari', 'joa', 'netbet', 'parionssport', 'pasinobet', 'pmu', 'unibet', 'winamax', 'zebet' ] res_parsing = {} for site in sites: if len(sites) > 1: print(site) url = get_competition_by_id(_id, site) try: if url: try: res_parsing[site] = parse(site, url) except urllib3.exceptions.MaxRetryError: selenium_init.DRIVER[site].quit() print("Redémarrage de selenium") selenium_init.start_selenium(site, timeout=20) res_parsing[site] = parse(site, url) except sqlite3.OperationalError: print( "Erreur dans la base de données, redémarrage en cours") res_parsing[site] = parse(site, url) except urllib.error.URLError: print("{} non accessible sur {} (délai écoulé)".format( competition, site)) except KeyboardInterrupt: res_parsing[site] = {} except selenium.common.exceptions.TimeoutException: print("Element non trouvé par selenium ({} sur {})".format( competition, site)) except sportsbetting.UnavailableCompetitionException: print("{} non disponible sur {}".format(competition, site)) except socket.timeout: print("{} non accessible sur {} (timeout socket)".format( competition, site)) except selenium.common.exceptions.StaleElementReferenceException: print("StaleElement non trouvé par selenium ({} sur {})".format( competition, site)) except selenium.common.exceptions.WebDriverException: print("Connection closed ({} sur {})".format(competition, site)) res = format_team_names(res_parsing, sport, competition) out = valid_odds(merge_dict_odds(res), sport) if inspect.currentframe().f_back.f_code.co_name != "<module>": return out
def parse_competitions(competitions, sport="football", *sites): sites_order = [ 'bwin', 'parionssport', 'betstars', 'pasinobet', 'joa', 'unibet', 'betclic', 'pmu', 'france_pari', 'netbet', 'winamax', 'zebet' ] if not sites: sites = sites_order sportsbetting.EXPECTED_TIME = 28 + len(competitions) * 12.5 selenium_sites = sportsbetting.SELENIUM_SITES.intersection(sites) selenium_required = ((inspect.currentframe().f_back.f_code.co_name in [ "<module>", "parse_thread" ] or 'test' in inspect.currentframe().f_back.f_code.co_name) and (selenium_sites or not sites)) sportsbetting.SELENIUM_REQUIRED = selenium_required sites = [site for site in sites_order if site in sites] sportsbetting.PROGRESS = 0 if selenium_required: for site in selenium_sites: while True: headless = sport != "handball" or site != "bwin" if sportsbetting.ABORT or selenium_init.start_selenium( site, headless, timeout=15): break colorama.init() print(termcolor.colored('Restarting', 'yellow')) colorama.Style.RESET_ALL colorama.deinit() sportsbetting.PROGRESS += 100 / len(selenium_sites) sportsbetting.PROGRESS = 0 sportsbetting.SUB_PROGRESS_LIMIT = len(sites) for competition in competitions: if competition == sport or "Tout le" in competition: import_teams_by_sport(sport) else: id_competition = get_id_formatted_competition_name( competition, sport)[0] if id_competition < 0: import_teams_by_competition_id_thesportsdb(id_competition) else: import_teams_by_url( "http://www.comparateur-de-cotes.fr/comparateur/" + sport + "/a-ed" + str(id_competition)) list_odds = [] try: sportsbetting.IS_PARSING = True list_odds = ThreadPool(7).map( lambda x: parse_competitions_site(competitions, sport, x), sites) sportsbetting.ODDS[sport] = merge_dict_odds(list_odds) except Exception: print(traceback.format_exc(), file=sys.stderr) sportsbetting.IS_PARSING = False if selenium_required: colorama.init() print(termcolor.colored('Drivers closed', 'green')) colorama.Style.RESET_ALL colorama.deinit() sportsbetting.ABORT = False
def add_names_to_db(competition, sport="football", *sites): """ Ajoute à la base de données les noms d'équipe/joueur pour une competition donnée sur tous les sites """ try: id_competition, formatted_name = get_id_formatted_competition_name( competition, sport) except TypeError: print("Competition inconnue") return {} print(formatted_name) if competition == sport or "Tout le" in competition: import_teams_by_sport(sport) else: import_teams_by_url("http://www.comparateur-de-cotes.fr/comparateur/" + sport + "/a-ed" + str(id_competition)) if not sites: sites = [ 'betclic', 'betstars', 'bwin', 'france_pari', 'joa', 'netbet', 'parionssport', 'pasinobet', 'pmu', 'unibet', 'winamax', 'zebet' ] selenium_sites = { "betstars", "bwin", "joa", "parionssport", "pasinobet", "unibet" } selenium_required = (inspect.currentframe().f_back.f_code.co_name == "<module>" and (selenium_sites.intersection(sites) or not sites)) if selenium_required: for site in selenium_sites.intersection(sites): selenium_init.start_selenium(site) for site in sites: print(site) url = get_competition_url(competition, sport, site) if url: try: teams = parse_and_add_to_db(site, sport, url) if teams: sportsbetting.TEAMS_NOT_FOUND.append(teams) except KeyboardInterrupt: print("Recommencez pour arrêter le parsing") time.sleep(1) except urllib3.exceptions.MaxRetryError: selenium_init.DRIVER.quit() print("Redémarrage de selenium") selenium_init.start_selenium() teams = parse_and_add_to_db(site, sport, url) if teams: sportsbetting.TEAMS_NOT_FOUND.append(teams) except selenium.common.exceptions.TimeoutException: pass except urllib.error.HTTPError: pass if selenium_required: for site in selenium_sites.intersection(sites): selenium_init.DRIVER[site].quit()
def parse_competition(competition, sport="football", *sites): """ Retourne les cotes d'une competition donnée pour un ou plusieurs sites de paris. Si aucun site n'est choisi, le parsing se fait sur l'ensemble des bookmakers reconnus par l'ARJEL """ try: _id, formatted_name = get_id_formatted_competition_name(competition, sport) except TypeError: print("Competition inconnue") return print(formatted_name, *sites) if not sites: sites = ['betclic', 'betstars', 'bwin', 'france_pari', 'joa', 'netbet', 'parionssport', 'pasinobet', 'pmu', 'unibet', 'winamax', 'zebet'] selenium_sites = {"betstars", "bwin", "joa", "parionssport", "pasinobet", "unibet"} selenium_required = (inspect.currentframe().f_back.f_code.co_name == "<module>" and (selenium_sites.intersection(sites) or not sites)) if selenium_required: selenium_init.start_selenium() res_parsing = {} for site in sites: if len(sites) > 1: print(site) url = get_competition_by_id(_id, site) try: if url: try: res_parsing[site] = parse(site, url) except urllib3.exceptions.MaxRetryError: selenium_init.DRIVER.quit() print("Redémarrage de selenium") selenium_init.start_selenium() res_parsing[site] = parse(site, url) except urllib.error.URLError: print("Site non accessible (délai écoulé)") except KeyboardInterrupt: res_parsing[site] = {} except selenium.common.exceptions.TimeoutException: print("Element non trouvé par selenium") except sportsbetting.UnavailableCompetitionException: print("Compétition non disponible") # sportsbetting.PROGRESS += 100/(len(sites)*sportsbetting.SUBPROGRESS_LIMIT) if selenium_required: selenium_init.DRIVER.quit() # if len(sites) > 1: res = format_team_names(res_parsing, sport) out = valid_odds(merge_dict_odds(res), sport) # else: # out = valid_odds(res_parsing[sites[0]], sport) if inspect.currentframe().f_back.f_code.co_name != "<module>": return out sportsbetting.ODDS[sport] = out
def parse_buteurs(): """ Stocke les cotes des duels de buteurs disponibles sur Betclic """ competitions = ["france ligue 1", "espagne liga", "italie serie", "allemagne bundesliga"] list_odds = [] for competition in competitions: print(get_id_formatted_competition_name(competition, "football")[1]) url = get_competition_url(competition, "football", "betclic") list_odds.append(parse_buteurs_betclic(url)) if inspect.currentframe().f_back.f_code.co_name != "<module>": return merge_dicts(list_odds) sportsbetting.ODDS["buteurs"] = merge_dicts(list_odds)