Exemplo n.º 1
0
def update_scoreboard(
):  # day is the number of days completed, hour is the latest tipoff to be included on the incomplete day
    f1 = None
    with browser.Browser(headless=True) as b:
        b.visit(
            "https://www.sports-reference.com/cbb/postseason/2022-ncaa.html")
        scoreboard_html = b.html_snapshot()
        f1 = open(scoreboard_html)
    all_text = f1.read()
    winners = {}
    for longwinner in re.findall("<div class=\"winner\".*?</a>.*?</a>",
                                 all_text, re.DOTALL):
        winner = re.findall(">.*?</a>", longwinner)[-2][1:-4]
        longdate = re.findall("<a href=\".*?>", longwinner)[-1]
        date = re.search("\d{4}-\d\d-\d\d-\d\d", longdate)[0]
        game_month = int(date[-8])
        game_day = int(date[-5:-3]) + (31 if game_month == 4 else 0)
        game_hour = date[-2:]
        try:
            winners[str(game_day) + game_hour] += "," + winner
        except KeyError:
            winners[str(game_day) + game_hour] = winner
    f1.close()
    with open("corefolder/scoreboard2022.txt", 'w') as f2:
        day = -1
        last_date = 0
        for winner in sorted(winners):
            hour = winner[-2:]
            date = int(winner[:2])
            if date > last_date:
                last_date = date
                day += 1
            f2.write("d" + str(day) + "h" + hour + ":" + winners[winner] +
                     ',' + str(date) + '\n')
Exemplo n.º 2
0
def update_kp():
    f1 = None
    f2 = None
    with browser.Browser(headless=True) as b:
        b.visit("https://kenpom.com")
        kp_html = b.html_snapshot()
        f1 = open(kp_html)
    for line in f1.readlines():
        if "update\">Data" in line:
            longdate = re.search("Data.*?</a>", line)[0]
            date = re.search("\d{4}-\d\d-\d\d", longdate)[0]
            game_month = int(date[-4])
            game_day = int(date[-2:]) + (31 if game_month == 4 else 0)
            f2 = open("corefolder/kp" + str(game_day) + ".txt", 'w')
            f2.write("team,rating,tempo\n")
        if "a href=\"team.php" in line:
            teamname = re.search("a href=\"team.php.*?</a>",
                                 line)[0].split('>')[1][:-3]
            kp = re.search(r"[+-]\d\d?\.\d\d", line)[0]
            kptempo = re.findall(r"\d\d?\.\d", line)[3]
            f2.write(teamname + ',' + kp + ',' + kptempo + '\n')
    f1.close()
    f2.close()
Exemplo n.º 3
0
    loadsData = json.loads(temp.read())

if CheckDataItems():

    if len(loadsData):
        fdata = data.copy()
        temp = open('Verified.json', 'r')
        VerifiedData = json.loads(temp.read())
        temp.close()
        temp = open(nameData, 'r')
        items = json.loads(temp.read())
        for item in fdata:
            index = VerifiedData.index(item['pricepos_code'])
            print(index)
            if index != -1:
                data.remove(item['pricepos_code'])

browser = browser.Browser('chrome', incognito=True)

for item in data:
    items.append()
    temp = open(nameData, 'w')
    jsdata = json.dumps(items)
    temp.write(jsdata)
    VerifiedData.append(item['pricepos_code'])
    jsdata = json.dumps(VerifiedData)
    temp = open('Verified.json', 'w')
    temp.write(jsdata)
    temp.close()
    time.sleep(2)
def getBfromP(groupID: str, path="html_sources/", year="2019", entry_limit=40):
    print("Connecting...", end=' ')
    with browser.Browser(headless=True) as b:
        if path == "html_sources/":
            path += groupID + '/'
        print("Connecting...", end=' ')
        myurl = "http://fantasy.espn.com/tournament-challenge-bracket/" + str(
            year) + "/en/group?groupID=" + groupID + "&_301_=2022"
        b.visit(myurl)
        group_html = b.html_snapshot()
        with open(group_html) as f:
            group_text = f.read()
            try:
                re.search(r"<header class=\"group-header\">.*?</header>",
                          group_text)[0][29:-9] + '/'
            except TypeError:
                raise ValueError(
                    "This pool is not publicly accessible, therefore the program cannot run"
                )
        mylinks = []
        links = b.links.find_by_partial_href("entry?entryID=")
        for i, j in enumerate(links):
            mylinks.append(j.value)
        if len(mylinks) > entry_limit:
            raise ValueError(
                "This program is intended for small groups. It is too expensive to visit "
                + len(mylinks) + " URLs.")
        linkcount = 0
        for i in mylinks:
            linkcount += 1
            if linkcount % 10 == 1:
                print("\nProgress: " + str(linkcount) + '/' +
                      str(len(mylinks)),
                      end=' ')
            else:
                print(str(linkcount) + '/' + str(len(mylinks)), end=' ')
            try:
                b.links.find_by_text(i).click()
                if "game" in b.url:
                    ID = re.search(r"entryID=\d*", b.url)[0]
                    urlBase = "http://fantasy.espn.com/tournament-challenge-bracket/" + str(
                        year) + "/en/entry?"
                    b.visit(urlBase + ID)
                notFound = False
            except splinter.exceptions.ElementDoesNotExist:
                print('\n' + i, "could not be found")
                notFound = True
            screenshot_path = b.html_snapshot()
            with open(screenshot_path) as f1:
                entryName = b.title.split(' -')[0]
                if notFound is True:
                    entryName = i
                entryName = entryName.replace('/', '_')
                try:
                    with open(path + entryName, 'w') as f2:
                        if notFound is False:
                            all_text = f1.read()
                            f2.write(all_text)
                        else:
                            f2.write(i + " could not be found")
                except (FileNotFoundError, NotADirectoryError):
                    try:
                        os.mkdir(path)
                    except (FileNotFoundError, NotADirectoryError):
                        os.mkdir(path.split('/')[0])
                        os.mkdir(path)
                    with open(path + entryName, 'w') as f2:
                        if notFound is False:
                            all_text = f1.read()
                            f2.write(all_text)
                        else:
                            f2.write(i + " could not be found")
            b.visit(myurl)
        print('\n', end='')
Exemplo n.º 5
0
# -*- coding: utf-8 -*-
"""
@CREATETIME: 05/07/2018 14:04 
@AUTHOR: Chans
@VERSION: 
"""

from selenium import webdriver
from splinter import browser
import sys

print(sys.path)

# chromedriver = '/Users/Apple/datadata/chrome/chromedriver'
# driver = webdriver.Chrome(chromedriver)
b = browser.Browser('chrome')
b.visit('https://www.baidu.com')