def test_read_csv(): path = "files/example.csv" source = pkg_resources.resource_filename(__name__, path) data_real = read(source) data_exp = [ ["a", "b", "c"], # 0 ["1", "A towel,", "1.0"], # 1 ["42", " it says, ", "2.0"], # 2 ["1337", "is about the most ", "-1"], # 3 ["0", "massively useful thing ", "123"], # 4 ["-2", "an interstellar hitchhiker can have.\n", "3"], # 5 ["3.141", "Special char test: €üößł", "2.7"], # 6 ] assert len(data_real) == len(data_exp) assert data_real[0] == data_exp[0] assert data_real[1] == data_exp[1] assert data_real[2] == data_exp[2] assert data_real[3] == data_exp[3] assert data_real[4] == data_exp[4] assert data_real[5] == data_exp[5] assert data_real[6] == data_exp[6] assert data_real == data_exp data_real = read(source, skiprows=1) assert data_real == data_exp[1:] data_real = read(source, skiprows=1, delimiter=",", quotechar='"') assert data_real == data_exp[1:]
def test_write_jsonl_all_params(jsonl_tempfile): data = [ { "some": "thing" }, { "foo": 17, "bar": False, "quux": True }, { "may": { "include": "nested", "objects": ["and", "arrays"] } }, ] _write_jsonl( jsonl_tempfile, data, kwargs={ "sort_keys": True, "separators": (",", ": "), "ensure_ascii": True }, ) data_read = read(jsonl_tempfile) assert data == data_read
def test_write_csv(csv_tempfile): newline = "\n" data = [ ["1", "A towel,", "1.0"], ["42", " it says, ", "2.0"], ["1337", "is about the most ", "-1"], ["0", "massively useful thing ", "123"], ["-2", "an interstellar hitchhiker can have.\n", "3"], ] write(csv_tempfile, data, newline=newline) data_read = read(csv_tempfile, newline=newline) assert data == data_read
def test_write_json(json_tempfile): data = { "a list": [1, 42, 3.141, 1337, "help", "€"], "a string": "bla", "another dict": { "foo": "bar", "key": "value", "the answer": 42 }, } write(json_tempfile, data) data_read = read(json_tempfile) assert data == data_read
def test_write_pickle_protocol(pickle_tempfile): data = { "a list": [1, 42, 3.141, 1337, "help", "€"], "a string": "bla", "another dict": { "foo": "bar", "key": "value", "the answer": 42 }, } write(pickle_tempfile, data, protocol=0) data_read = read(pickle_tempfile) assert data == data_read
def test_read_pickle(): path = "files/example.pickle" source = pkg_resources.resource_filename(__name__, path) data_real = read(source) data_exp = { "a list": [1, 42, 3.141, 1337, "help", "€"], "a string": "bla", "another dict": { "foo": "bar", "key": "value", "the answer": 42 }, } assert data_real == data_exp
def test_write_jsonl(jsonl_tempfile): data = [ { "some": "thing" }, { "foo": 17, "bar": False, "quux": True }, { "may": { "include": "nested", "objects": ["and", "arrays"] } }, ] write(jsonl_tempfile, data) data_read = read(jsonl_tempfile) assert data == data_read
def test_write_json_params(json_tempfile): data = { "a list": [1, 42, 3.141, 1337, "help", "€"], "a string": "bla", "another dict": { "foo": "bar", "key": "value", "the answer": 42 }, } write( json_tempfile, data, indent=4, sort_keys=True, separators=(",", ":"), ensure_ascii=False, ) data_read = read(json_tempfile) assert data == data_read
def test_read_csv_dicts(): path = "files/example.csv" source = pkg_resources.resource_filename(__name__, path) data_real = read(source, format="dicts") data_exp = [ { "a": "1", "b": "A towel,", "c": "1.0" }, { "a": "42", "b": " it says, ", "c": "2.0" }, { "a": "1337", "b": "is about the most ", "c": "-1" }, { "a": "0", "b": "massively useful thing ", "c": "123" }, { "a": "-2", "b": "an interstellar hitchhiker can have.\n", "c": "3" }, { "a": "3.141", "b": "Special char test: €üößł", "c": "2.7" }, ] assert len(data_real) == len(data_exp) assert data_real[0] == data_exp[0] assert data_real == data_exp
def test_read_jsonl(): path = "files/example.jsonl" source = pkg_resources.resource_filename(__name__, path) data_real = read(source) data_exp = [ { "some": "thing" }, { "foo": 17, "bar": False, "quux": True }, { "may": { "include": "nested", "objects": ["and", "arrays"] } }, ] assert len(data_real) == len(data_exp) for real, exp_ in zip(data_real, data_exp): assert real == exp_
import mpu.io as mi import pdb pdb.set_trace() rawCSV = mi.read('gameTable.csv') f = open('checkingoutput.txt', 'w') while list([]) in rawCSV: rawCSV.remove(list([])) valueOrder = rawCSV.pop(0) newList = [] for i in rawCSV: temp = [] for k in i: if (k[0].isdigit()): temp.append(k) elif (k[0] == ' '): temp.append('NULL') else: k = k.replace("&", "and") k = k.replace('\'', "''") temp.append('\'' + k + '\'') newList.append(temp) p = open('insertGames.sql', 'w') newValueOrder = [x.replace(" ", "_") for x in valueOrder]
from mpu import io as mi import pdb mainDict = mi.read('player_played_in_table.json') finalList = [] header = ['Game_No', 'Player_id'] finalList.append(header) pdb.set_trace() for i in mainDict: temp = [] for j in mainDict[i]: for k in j: temp.append([i,k]) for j in temp: finalList.append(j) mi.write('player_played_inTable.csv', finalList)
from mpu import io as mi mainList = mi.read('TeamTable.csv') f = open('outputChecker.txt', 'w') count = 0 #['', '', '', '', '', ''] while list(['', '', '', '', '', '']) in mainList: mainList.remove(list(['', '', '', '', '', ''])) """ for i in mainList: print(i, file = f) """ valueOrder = mainList.pop(0) newMainList = [] import pdb for i in mainList: tempToJoin = [] for j in i: tempToJoin.append("'" + j + "'") print(tempToJoin, file=f) tempToJoin[0] = tempToJoin[0].split('\'')[1:2][0] newMainList.append(tempToJoin) #" ".join(newMainList[48][2].split('\'')).strip().replace(" ", " ") tempMainList = [] for i in newMainList:
print("|||".join([ self.number, self.name, self.pos, self.year, self.height, self.weight, self.hometown, self.last_school ])) #get teams by conference year #setup the host that we are hitting parser = configparser.ConfigParser() parser.read("current_config.ini") method = "http:/" host = parser['host_information']['host'] #get the reference ID's ids = mi.read("team_ids.json") #this object will be modified throughout the script player_ids = {str(x): {} for x in range(2009, 2020)} id_to_increment = 10006195 def format_player_for_api_and_send(player): #we need the height in inches if not len(player.height) == 1: height = player.height feet, inches = height.split("-") total_height = int(feet) * 12 + int(inches) player.player_height_inches = total_height else: player.player_height_inches = "-"
def test_read_h5(): source = pkg_resources.resource_filename("mpu", "io.py") with pytest.raises(NotImplementedError): read(source)
import mpu.io as mi myDict = mi.read('gameTabelData.json') listOfGames = [] listOfGames.append(['game_ID', 'Team one', 'Team two', 'Q1', 'Q2', 'Q3', 'Q4','OT', 'Matchup', 'Game Type']) for i in myDict: temp = [] temp.append(i) for j in myDict[i][1]: temp.append(j) for j in myDict[i][2]: temp.append(j) if (len(temp) != 8): temp.append(" ") if (i == '400941816'): temp.append(" ") temp.append(myDict[i][3]) temp.append(myDict[i][4]) listOfGames.append(temp) f = open('output.txt', 'w') mi.write('gameTable.csv', listOfGames)
import mpu.io as mi teamList = mi.read('teamReceivingStats.csv') while list([]) in teamList: teamList.remove(list([])) valueOrder = teamList.pop(0) f = open('teamReceivingStats.sql', 'w') newValueOrder = [x.replace("Avg", "Average") for x in valueOrder] for i in teamList: print("INSERT INTO " + "teamReceivingStats(", ",".join(newValueOrder) + ")", file=f) print("VALUES (" + ','.join(i) + ");", file=f, end="\n\n")
import mpu.io as mi import pdb f = open('output.txt', 'w') myDict = mi.read('finalPlayerStatsDict.json') finalDict = {} for i in myDict: tempDict = {} for j in myDict[i]: tempDictlast = {} for k in myDict[i][j]: tempDictlast[k[0][0]] = k[1:] tempDict[j] = tempDictlast finalDict[i] = tempDict #mi.write('testDict(PossibleFinalForPlayerStats).json', finalDict) pdb.set_trace() for i in finalDict: totalData = 0 for j in finalDict[i]: for k in finalDict[i][j]: for m in finalDict[i][j][k]: totalData += len(m) print(totalData, file=f)
import mpu.io as mi differences = [] checkingDict = mi.read('revisedTeamDict.json') oldTeamDict = mi.read('teamDict.json') rosterDict = mi.read('rosterDict.json') rosterDict['Texas A&M'] = rosterDict.pop('Texas A&M') for i in [ 'Connecticut', 'Florida International', 'Louisiana-Lafayette', 'Louisiana-Monroe', 'Massachusetts', 'Miami (Florida)', 'Miami (Ohio)', 'Mississippi', 'North Carolina State', 'UTSA' ]: if i == 'Connecticut': rosterDict['UConn'] = rosterDict.pop(i) elif i == 'Florida International': rosterDict['Florida Intl.'] = rosterDict.pop(i) elif i == 'Louisiana-Lafayette': rosterDict['Louisiana'] = rosterDict.pop(i) elif i == 'Louisiana-Monroe': rosterDict['Louisiana Monroe'] = rosterDict.pop(i) elif i == 'Massachusetts': rosterDict['UMass'] = rosterDict.pop(i) elif i == 'Miami (Florida)': rosterDict['Miami'] = rosterDict.pop(i) elif i == 'Miami (Ohio)': rosterDict['Miami (OH)'] = rosterDict.pop(i) elif i == 'Mississippi': rosterDict['Ole Miss'] = rosterDict.pop(i) elif i == 'North Carolina State': rosterDict['NC State'] = rosterDict.pop(i) elif i == 'UTSA':
import mpu.io as mi import pdb ppiCSV = mi.read('player_played_inTable.csv') print("finished loading") while list([]) in ppiCSV: ppiCSV.remove(list([])) print("finished removing") f = open('insertForPPI.sql', 'w') valueOrder = ppiCSV.pop(0) for i in ppiCSV: print("INSERT INTO " + "playerPlayedIn(", ",".join(valueOrder) + ")", file=f) print("VALUES (" + ",".join(i) + ");", file=f, end="\n\n")
import mpu.io as mi teamPlayedInCSV = mi.read('team_played_inTable.csv') while (list([]) in teamPlayedInCSV): teamPlayedInCSV.remove(list([])) valueOrder = teamPlayedInCSV.pop(0) f = open('insertForTeamPlayedIn.sql', 'w') for i in teamPlayedInCSV: print("INSERT INTO " + "TeamPlayedIn(" + ",".join(valueOrder), ")", file=f) print("VALUES(", ",".join(i), ");", file=f)
def read_json(fileName): import mpu.io as m return m.read(fileName + '.json')
import csv import mpu.io as mi import pdb myDictionary = mi.read('mascotDict.json') listOfTables = [] for i in myDictionary: if (i != 'Liberty'): listOfTables.append(myDictionary[i]) f = open('output.txt', 'w') for i in listOfTables: for j in i: if (j[0] == '['): #pdb.set_trace() i.remove(j) teamName = 0 mascot = 1 city = 2 state = 3 conference = 4 ID = -1 listToCSV = [] for i in listOfTables: temp = [] temp.append(i[ID]) temp.append(i[teamName])
#this program will fix the formatting of the player tables (insert pseudo-id's) import mpu.io as mi import random playerTableDict = mi.read('playerTableDict.json') f = open('output.txt', 'w') count = 10000000 for i in playerTableDict: try: for j in playerTableDict[i]: if (len(j) == 9): j.insert(8, str(count)) count += 1 except: continue mi.write('fixedPlayerTableDict.json', playerTableDict) for i in playerTableDict: try: for j in playerTableDict[i]: print(len(j)) except: continue
value = '0' self.data[hash_str] = value def set_table_name(self, name): self.table_name = sub(r'[^a-zA-Z]', '', name) self.url = "http://" + api_host + "/statistics/" + self.table_name[ 0].lower() + self.table_name[1:] def print_data(self): print(json.dumps(self.data, indent=4)) configParser = configparser.ConfigParser() configParser.read("current_config.ini") api_host = configParser["host_information"]['host'] teams_id = mio.read('team_ids.json') player_ids = mio.read('player_ids.json') def gather_stats_for_player(source, player_id, year_played): soup = BeautifulSoup(source) soup = BeautifulSoup(source, features='html.parser') captions = soup.find_all("caption") tables = soup.find_all("table", attrs={"class", "player-home"}) #now we need to get the table headers inside of the tables table_headers = [[x.find_all("th"), x.find_all("td")] for x in tables] for group in table_headers: group[0] = [x.text for x in group[0]] group[1] = [x.text for x in group[1]]
import mpu.io as mi participationDict = mi.read('TeamsAndID.json') teamDict = mi.read('revisedTeamDict.json') tableDict = {} for i in participationDict: tempList = [] for j in participationDict[i]: tempList.append(teamDict[participationDict[i][j]]) tableDict[i] = tempList mi.write('gameParticipationTableDictionary.json', tableDict)
try: if (not (requests.post(url))): while (not (requests.post(url))): wait += 1 time.sleep(wait) source = ureq.urlopen(url).read() boolCheck = 0 except: continue print(url) return source #begins the script dictInfo = mi.read('TeamsAndID.json') #gets the imported dictionary #iterating through the dictionary gameTableData = {} for i in dictInfo: #pdb.set_trace() source = getSource(i) try: WinnerData = GSD.getTeam(source, dictInfo[i]['Winner']) except: print("game", i, "fail") try: LoserData = GSD.getTeam(source, dictInfo[i]['Loser']) except: print("game", i, "fail") tempDict = {}
def test_read_hdf5(): path = "files/example.hdf5" source = pkg_resources.resource_filename(__name__, path) with pytest.raises(NotImplementedError): read(source)
import pdb import scraper import time def generateURLs (teamId,playerId): #http://www.cfbstats.com/2017/player/721/1081488/index.html urls = [] first = "http://www.cfbstats.com/2017/player/" last = "/index.html" for i in playerId: mid = str(teamId) + "/" + str(i) urls.append(first + mid + last) return urls rosterDict = mi.read('rosterDict.json') teamDict = mi.read('teamDict.json') newDict = {} for i in teamDict: listOfIDs = [] try: for j in rosterDict[i]['2017']: listOfIDs.append(j) newDict[teamDict[i]] = listOfIDs except: pass #mi.write('teamAndPlayerIDS.json', newDict)
import getSources import mpu.io as mi import getTeamNanes import getBowlGame import cancelChecker as CC import getScore dictionary = mi.read('gameIDS.json') IDS = [] gameDataDict = {} for i in dictionary: for j in dictionary[i]: source = getSources.GetSource(j) #check if cancelled if ((CC.canceled(source) == 1)): bowlName = "" check = 0 #check if bowl game if (i == 'Bowls'): bowlName = getBowlGame.getBowlGame(source) check = 1 TeamNames = getTeamNanes.parseTeamNames(source) important = getScore.getScores(source, TeamNames, bowlName, j, check) print(important) gameDataDict[str(j)] = important else: continue mi.write('gameTabelData.json', gameDataDict)