示例#1
0
def test_write_h5(hdf5_tempfile):
    data = [
        ["1", "A towel,", "1.0"],
        ["42", " it says, ", "2.0"],
        ["1337", "is about the most ", "-1"],
        ["0", "massively useful thing ", "123"],
        ["-2", "an interstellar hitchhiker can have.\n", "3"],
    ]
    with pytest.raises(NotImplementedError):
        write(hdf5_tempfile, data)
示例#2
0
def test_write_csv(csv_tempfile):
    newline = "\n"
    data = [
        ["1", "A towel,", "1.0"],
        ["42", " it says, ", "2.0"],
        ["1337", "is about the most ", "-1"],
        ["0", "massively useful thing ", "123"],
        ["-2", "an interstellar hitchhiker can have.\n", "3"],
    ]
    write(csv_tempfile, data, newline=newline)
    data_read = read(csv_tempfile, newline=newline)
    assert data == data_read
示例#3
0
def test_write_pickle_protocol(pickle_tempfile):
    data = {
        "a list": [1, 42, 3.141, 1337, "help", "€"],
        "a string": "bla",
        "another dict": {
            "foo": "bar",
            "key": "value",
            "the answer": 42
        },
    }
    write(pickle_tempfile, data, protocol=0)
    data_read = read(pickle_tempfile)
    assert data == data_read
示例#4
0
def test_write_json(json_tempfile):
    data = {
        "a list": [1, 42, 3.141, 1337, "help", "€"],
        "a string": "bla",
        "another dict": {
            "foo": "bar",
            "key": "value",
            "the answer": 42
        },
    }
    write(json_tempfile, data)
    data_read = read(json_tempfile)
    assert data == data_read
示例#5
0
def getGameID():
    #http://www.espn.com/college-football/scoreboard/_/group/80/year/2017/seasontype/2/week/1

    urlFirst = "http://www.espn.com/college-football/scoreboard/_/group/80/year/2017/seasontype/2/week/"
    counter = 1
    gameIDS = {}

    while (counter < 16):
        browser = webdriver.Chrome('C:\webdrivers\chromedriver.exe')
        browser.set_window_size(1120, 550)
        browser.get(urlFirst + str(counter))
        #WebDriverWait(browser, 3)
        source = browser.page_source
        browser.close()

        soup = BS(source, 'html5lib')
        idsToGet = soup.find_all('a', class_="mobileScoreboardLink")
        toBeParsed = []

        for i in idsToGet:
            toBeParsed.append(str(i))

        ids = parse(toBeParsed)

        gameIDS["Week " + str(counter)] = ids
        counter += 1
    browser = webdriver.Chrome('C:\webdrivers\chromedriver.exe')
    browser.set_window_size(1120, 550)
    browser.get(
        "http://www.espn.com/college-football/scoreboard/_/group/80/year/2017/seasontype/3/week/1"
    )
    #WebDriverWait(browser, 3)
    source = browser.page_source
    browser.close()

    soup = BS(source, 'html5lib')
    idsToGet = soup.find_all('a', class_="mobileScoreboardLink")
    toBeParsed = []

    for i in idsToGet:
        toBeParsed.append(str(i))

    ids = parse(toBeParsed)

    gameIDS["Bowls"] = ids

    io.write('gameIDS.json', gameIDS)
示例#6
0
def test_write_json_params(json_tempfile):
    data = {
        "a list": [1, 42, 3.141, 1337, "help", "€"],
        "a string": "bla",
        "another dict": {
            "foo": "bar",
            "key": "value",
            "the answer": 42
        },
    }
    write(
        json_tempfile,
        data,
        indent=4,
        sort_keys=True,
        separators=(",", ":"),
        ensure_ascii=False,
    )
    data_read = read(json_tempfile)
    assert data == data_read
示例#7
0
def test_write_jsonl(jsonl_tempfile):
    data = [
        {
            "some": "thing"
        },
        {
            "foo": 17,
            "bar": False,
            "quux": True
        },
        {
            "may": {
                "include": "nested",
                "objects": ["and", "arrays"]
            }
        },
    ]
    write(jsonl_tempfile, data)
    data_read = read(jsonl_tempfile)
    assert data == data_read
示例#8
0
        except:
            temp.append(" ")
            temp.append(" ")
        try:
            temp.append(i[0][2][5])
        except:
            print(" ")
        temp.append(i[0][8][1])
        try:
            temp.append(i[0][6][1])
        except:
            temp.append(" ")
        winnersFinal.append(temp)
    except:
        pdb.set_trace()
mi.write('gameWinnersTable.csv', winnersFinal)

for i in mainDict:
    temp = []
    try:
        temp.append(mainDict[i]['Loser'])
        temp.append(i)
        losersFirst.append(temp)
    except:
        pass
losersFinal = []
losersFinal.append(header)

for i in losersFirst:
    try:
        temp = []
示例#9
0
import mpu.io as mi
import pdb
mainDict = mi.read('gameParticipationTableDictionary.json')
listOfParticipation = []
pdb.set_trace()
for i in mainDict:
    temp = []
    for j in mainDict[i]:
        temp.append([i, j])

    for j in temp:
        listOfParticipation.append(j)

f = open('output.txt', 'w')

for i in listOfParticipation:
    print(i, file=f)

header = ['Game_No', 'Team_ID']

finalList = []
finalList.append(header)

for i in listOfParticipation:
    finalList.append(i)

mi.write('team_played_inTable.csv', finalList)
    url_to_put_to = format_endpoint(year)

    soup = BeautifulSoup(requests.get(url).text, features="html.parser")

    #get all of the divisions we are interested in
    #the website uses identical ids for different things, so we will need to deal with that

    first_layer_divs = soup.find_all("div", attrs={"class": "conference"})
    for div in first_layer_divs:
        #let us package this up in something nice... a small class would be usefull

        conference_name = div.find("h1").text
        for x in div.find_all("a"):
            ids[x.text] = x['href'].split("/")[-2]
        teams = [team.text for team in div.find_all("a")]
        for x in teams:
            #add them to the list of things to post later
            relationships_to_add.append(Data_holder(year, conference_name, x))


[handle_year(x) for x in range(2009, 2020)]
mi.write("team_ids.json", data=ids)
exit(1)
#go ahead and send them to the API
for x in relationships_to_add:
    url = format_endpoint(x.year)
    requests.put(url,
                 json={
                     "conference_name": x.conference_name,
                     "team_name": x.team_name
                 })
#this program will fix the formatting of the player tables (insert pseudo-id's)
import mpu.io as mi
import random

playerTableDict = mi.read('playerTableDict.json')

f = open('output.txt', 'w')
count = 10000000
for i in playerTableDict:
    try:
        for j in playerTableDict[i]:
            if (len(j) == 9):
                j.insert(8, str(count))
                count += 1

    except:
        continue
mi.write('fixedPlayerTableDict.json', playerTableDict)

for i in playerTableDict:
    try:
        for j in playerTableDict[i]:
            print(len(j))
    except:
        continue
import mpu.io as mi

participationDict = mi.read('TeamsAndID.json')
teamDict = mi.read('revisedTeamDict.json')
tableDict = {}
for i in participationDict:
    tempList = []
    for j in participationDict[i]:
        tempList.append(teamDict[participationDict[i][j]])
    tableDict[i] = tempList

mi.write('gameParticipationTableDictionary.json', tableDict)
        rosterDict['Miami (OH)'] = rosterDict.pop(i)
    elif i == 'Mississippi':
        rosterDict['Ole Miss'] = rosterDict.pop(i)
    elif i == 'North Carolina State':
        rosterDict['NC State'] = rosterDict.pop(i)
    elif i == 'UTSA':
        rosterDict['UT San Antonio'] = rosterDict.pop(i)

player_played_in = {}

for i in rosterDict:
    temp = []
    try:
        for j in rosterDict[i]['2017']:
            temp.append(j)
    except:
        pass
    player_played_in[i] = temp
mi.write('player_played_intemp.json', player_played_in)

games = mi.read('TeamsAndID.json')

player_played_in_table = {}
for i in games:
    temp = []
    for j in games[i]:
        temp.append(player_played_in[games[i][j]])
    player_played_in_table[i] = temp

mi.write('player_played_in_table.json', player_played_in_table)

if __name__ == '__main__':
    #example for endpoint "http://localhost:3000/players/team?school=Clemson&year=2019"
    #i'm going to establish a pho list of statistics to see what is required by the database schema and what needs to be added

    for x in [y for y in range(2009, 2020)]:
        conferences = get_conferences(x)
        #now that we have the conferences, we need to get the teams
        for conf_data in conferences:
            name = conf_data['name']
            year = conf_data['year']
            teams = get_teams_by_conference(name, year)
            for team_data in teams['teams']:
                #now we need to get the team_id, which is stored locally in the script
                team_id = teams_id[team_data['name']]
                team_name = team_data['name']
                team_year = team_data['year']
                result = get_players_by_team_name_and_year(
                    team_name, team_year).json()
                for player_data in result:
                    if (check_if_player_id_exists(team_name, team_year,
                                                  player_data['player_id'])):
                        player_source = get_player_home_index_page(
                            team_id, player_data['player_id'])
                        parse_player_home_index_page(player_source)
            break
        break

    mio.write('fields_for_schema.json', fields_for_schema)
示例#15
0
    if (i not in listOfteams):
        listOfFlags.append(i)

revisedDict = mi.read('teamDict.json')
#pdb.set_trace()
for i in revisedDict:
    if (i in listOfFlags):
        print("Checking", i)
        if (i == "Connecticut"):
            revisedDict["UConn"] = revisedDict.pop(i)
        elif (i == "Florida International"):
            revisedDict["Florida Intl."] = revisedDict.pop(i)
        elif (i == "Louisiana-Lafayette"):
            revisedDict["Louisiana"] = revisedDict.pop(i)
        elif (i == "Louisiana-Monroe"):
            revisedDict["Louisiana Monroe"] = revisedDict.pop(i)
        elif (i == "Massachusetts"):
            revisedDict["UMass"] = revisedDict.pop(i)
        elif (i == "Miami (Florida)"):
            revisedDict["Miami"] = revisedDict.pop(i)
        elif (i == "Miami (Ohio)"):
            revisedDict["Miami (OH)"] = revisedDict.pop(i)
        elif (i == "Mississippi"):
            revisedDict["Ole Miss"] = revisedDict.pop(i)
        elif (i == "North Carolina State"):
            revisedDict["NC State"] = revisedDict.pop(i)
        elif (i == "UTSA"):
            revisedDict["UT San Antonio"] = revisedDict.pop(i)
mi.write('revisedTeamDict.json', revisedDict)
#pdb.set_trace()
示例#16
0
#iterating through the dictionary
gameTableData = {}
for i in dictInfo:
    #pdb.set_trace()
    source = getSource(i)
    try:
        WinnerData = GSD.getTeam(source, dictInfo[i]['Winner'])
    except:
        print("game", i, "fail")
    try:
        LoserData = GSD.getTeam(source, dictInfo[i]['Loser'])
    except:
        print("game", i, "fail")
    tempDict = {}
    try:
        WinnerData.insert(0, dictInfo[i]['Winner'])
        stuff = WinnerData
        tempDict['Winner'] = stuff
    except:
        pass
    try:
        LoserData.insert(0, dictInfo[i]['Loser'])
        stuff = LoserData
        tempDict['Loser'] = stuff
    except:
        pass
    gameTableData[i] = tempDict

mi.write('gameTableData.json', gameTableData)
                            newList.append(fourth)
                        thingToReplace[0] = newList
                        tempTwoDict[temp] = thingToReplace
                except:
                    pdb.set_trace()
        except:
            pdb.set_trace()

        tempOneDict[j] = tempTwoDict

    newDict[i] = tempOneDict

for i in newDict:
    pass

mi.write('testingDict.json', newDict)

testingOutput = open('newTest.txt', 'w')
for i in newDict:
    for j in newDict[i]:
        for k in newDict[i][j]:
            #print(k, file = testingOutput)
            print("INSERT INTO",
                  k,
                  "(",
                  ",".join(newDict[i][j][k][0]),
                  ",",
                  "PlayerID",
                  ")",
                  file=testingOutput)
            print('VALUES(',
    tableHeaderSoup = BS(str(tableRows[0]), 'lxml')
    tableDataSoup = BS(str(tableRows[-2]), 'lxml')

    tableHeaderData = tableHeaderSoup.find_all('th')
    tableHeaders = []
    for i in tableHeaderData:
        tableHeaders.append(i.text)

    tableData = tableDataSoup.find_all('td')
    tableDatas = []
    for i in tableData:
        tableDatas.append(i.text)

    return [tableHeaders, tableDatas]


myDict = mi.read('teamDict.json')
finalList = {}
for i in myDict:
    if (i != "Liberty"):
        temp = getUrls(myDict[i])
        tempDict = {}
        for j in temp:
            soup = getSoup(temp[j])
            element = getTableHeaders(soup)
            tempDict[j] = element
        finalList[i] = tempDict
    else:
        pass
mi.write('finalTeamTableDict.json', finalList)
示例#19
0
#initialize edges for Bellman-Ford Algorithm
edges = initializeEdges(testGraph)
outputFord = bellmanFord(testGraph, edges, source)

#

outputDistances("Bellman-Ford Algorithm", outputFord['Distances'], f, source)
outputPathways("Bellman-Ford Algorithm", outputFord['Previous'], f, source)
yORn = getUserInput(
    "Would you like the 'distance' and 'previous' dictionaries written to seperate .json files? (yes or no): "
)

if (yORn == 'yes'):
    if (mpuYesOrNo == 'yes'):
        mi.write('outPutBellmanFord.json', outputFord)
        mi.write('outputForDijkstra.json', output)
        print(
            'Dijkstra Results: outputForDijkstra.json\nBellman-Ford Results: outPutBellmanFord.json'
        )

    elif (mpuYesOrNo == 'no'):
        with open('outPutBellmanFord.json', 'w') as wj:
            json.dump(outputFord, wj, indent=10)
        with open('outputForDijkstra.json', 'w') as wj:
            json.dump(output, wj, indent=10)
        print(
            'Dijkstra Results: outputForDijkstra.json\nBellman-Ford Results: outPutBellmanFord.json'
        )

yORn = getUserInput(
示例#20
0
PlayerList = []
PlayerList.append([
    'Number', 'Last Name', 'First Name', 'Position', 'Year', 'heigh', 'weight',
    'hometown', 'Highschool', 'Player_ID', 'Team_ID'
])
#pdb.set_trace()
pdb.set_trace()
for i in myDict:

    try:
        for j in myDict[i]:
            #fixing 1
            temp = j[1]
            names = temp.split(",")
            del j[1]
            j.insert(1, names[1])
            j.insert(1, names[0])
            #print(j[5])
            temp = j[5]
            heigh = temp.split("-")
            #print(heigh)
            del j[5]
            j.insert(5, "'".join(heigh))
            PlayerList.append(j)
    except:
        pass  #will skip liberty (not a team in 2017, but existed on the wikepedia scrape)

f = open('output.txt', 'w')

mi.write('PlayerTable.csv', PlayerList)
示例#21
0
import mpu.io as mi 

myDict = mi.read('gameTabelData.json')

listOfGames = []
listOfGames.append(['game_ID', 'Team one', 'Team two', 'Q1', 'Q2', 'Q3', 'Q4','OT', 'Matchup', 'Game Type'])
for i in myDict: 
    temp = []
    temp.append(i)
    for j in myDict[i][1]: 
        temp.append(j)
    for j in myDict[i][2]: 
        temp.append(j)
    if (len(temp) != 8): 
        temp.append(" ")
    if (i == '400941816'): 
        temp.append(" ")
    temp.append(myDict[i][3])
    temp.append(myDict[i][4])
    listOfGames.append(temp)
f = open('output.txt', 'w')

mi.write('gameTable.csv', listOfGames)
     
print(teams, file=f)
pdb.set_trace()
#Loop Through the list and get the data
for i in GameTable:
    temp = GameTable[i][1]

    #see if the team exists in the database.
    #Temporary dictionary to hold winner and loser
    tempDict = {}

    #teams are organized in a way such that the winner is the first and the loser is the second
    #so the counter will be used to determine position when we get a hit on the if (is in) statement
    counter = 0  #

    #loops through the list of teams
    for j in temp:
        if (j in teams):
            if (counter == 0):
                tempDict['Winner'] = j
            elif (counter == 1):
                tempDict['Loser'] = j
        counter += 1

    #saves the winner and the loser dictionary to the overall dictionary under the game ID
    #used to find statistics.
    TeamsAndID[i] = tempDict

#save the created dictionary for revision
mi.write('TeamsAndID.json', TeamsAndID)
import mpu.io as mi 
import pdb

teamDict = mi.read('teamDict.json')
teamStatsDict = mi.read('finalTeamTableDict.json')
pdb.set_trace()
passingList = teamStatsDict["Air Force"]["rushing"][0][4:]
passingList = [x.replace(".", "") for x in passingList[:]]
passingList = [x.replace("/", "_") for x in passingList[:]]
passingList = [x.replace("Int", "Interception") for x in passingList[:]]
newPassingList = [list(passingList)]
newPassingList[0].append("TEAM_ID")
pdb.set_trace()
for i in teamStatsDict: 
    temp = teamStatsDict[i]["rushing"][1][4:]
    temp.append(teamDict[i])
    newPassingList.append(temp)

mi.write('teamRushingStats.csv', newPassingList)



    
    
    
for i in listOfTables: 
    for j in i: 
        if (j[0] == '['): 
            #pdb.set_trace()
            i.remove(j)

teamName = 0
mascot = 1
city = 2
state = 3
conference = 4
ID = -1

listToCSV = []

for i in listOfTables: 
    temp = []
    temp.append(i[ID])
    temp.append(i[teamName])
    temp.append(i[mascot])
    temp.append(i[city])
    temp.append(i[state])
    temp.append(i[conference])

    listToCSV.append(temp)

mi.write('TeamTable.csv', listToCSV)
    

import getSources
import mpu.io as mi
import getTeamNanes
import getBowlGame
import cancelChecker as CC
import getScore
dictionary = mi.read('gameIDS.json')
IDS = []
gameDataDict = {}
for i in dictionary:
    for j in dictionary[i]:
        source = getSources.GetSource(j)
        #check if cancelled
        if ((CC.canceled(source) == 1)):
            bowlName = ""
            check = 0
            #check if bowl game
            if (i == 'Bowls'):
                bowlName = getBowlGame.getBowlGame(source)
                check = 1

            TeamNames = getTeamNanes.parseTeamNames(source)
            important = getScore.getScores(source, TeamNames, bowlName, j,
                                           check)
            print(important)
            gameDataDict[str(j)] = important
        else:
            continue

mi.write('gameTabelData.json', gameDataDict)
示例#26
0
teamDict = mi.read('teamDict.json')

newDict = {}


for i in teamDict: 
    listOfIDs = []
    try: 
        for j in rosterDict[i]['2017']: 
            listOfIDs.append(j)
        newDict[teamDict[i]] = listOfIDs
    except: 
        pass

#mi.write('teamAndPlayerIDS.json', newDict)
#pdb.set_trace()
playerStatisticsDict = {}
for i in newDict: 
    #pdb.set_trace()
    urls = generateURLs(i, newDict[i])
    start = time.time()
    elements = scraper.main(urls, newDict[i])
    end = time.time()
    print(end - start)
    playerStatisticsDict[str(i)] = elements

mi.write('finalPlayerStatsDict.json', playerStatisticsDict)



示例#27
0
from mpu import io as mi 
import pdb

mainDict = mi.read('player_played_in_table.json')

finalList = []
header = ['Game_No', 'Player_id']
finalList.append(header)
pdb.set_trace()
for i in mainDict: 
    temp = []
    for j in mainDict[i]: 
        for k in j: 
            temp.append([i,k])
    for j in temp: 
        finalList.append(j)

mi.write('player_played_inTable.csv', finalList)