/
scrape_bridge_nl.py
64 lines (44 loc) · 2.04 KB
/
scrape_bridge_nl.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
#!/usr/bin/env python3
"""
Scrape results and board details from bridge.nl
"""
__author__ = "Joost Langeveld"
__license__ = "MIT"
import bs4
import pandas as pd
import re
from util import get_browser, get_soup
def scrape_cards_bridge_nl(url: str, browser=get_browser()) -> dict:
winds_bridge_nl = 'NWES'
soup = get_soup(browser=browser, url=url)
hand_tags = soup.find_all('div', class_='vierkant33procent spelverdeling_hand')
return {wind: hand_tag.text.split('\n')[1:5] for wind, hand_tag in zip(winds_bridge_nl, hand_tags)}
bridge_nl_url_prefix = 'https://uitslagen.bridge.nl/ords/'
bridge_nl_overview_url_suffix = 'f?p=300:2010:8013769507611::NO:RP:P0_VER_ID_NBB:2043'
bridge_nl_overview_url = bridge_nl_url_prefix + bridge_nl_overview_url_suffix
player_name = 'Joost Langeveld'
url = bridge_nl_overview_url
normal_browser = get_browser()
soup = get_soup(browser=normal_browser, url=url)
#
a_tournament_tags = soup.find_all('td', class_='t-Report-cell', headers="STAND")
for i, a_tournament_tag in enumerate(a_tournament_tags):
tournament_name = a_tournament_tag.find('a').text
tournament_url = bridge_nl_url_prefix + a_tournament_tag.find('a')['href']
# print(f'{i:2} tournament_name={tournament_name:60} url={tournament_url}')
url = tournament_url
soup = get_soup(browser=normal_browser, url=url)
a_result_tags = soup.find_all('td', class_='t-Report-cell', headers="NAAM_000")
for i, a_result_tag in enumerate(a_result_tags):
result_name = a_result_tag.find('a').text
result_url = bridge_nl_url_prefix + a_result_tag.find('a')['href']
# print(f'{i:2} result_name={result_name:60} url={result_url}')
if player_name.lower().strip() in result_name.lower():
player_result_url = result_url
url = player_result_url
soup = get_soup(browser=normal_browser, url=url)
tags = soup.find_all('td', class_='t-Report-cell', headers=re.compile('SPELNUMMER*'))
for i, tag in enumerate(tags):
name = tag.find('a').text
url = bridge_nl_url_prefix + tag.find('a')['href']
print(scrape_cards_bridge_nl(url, normal_browser))