def extractTVUData(year, pathToData, categories, saveCategoryBestList): allAthleteIDs = {} for categoryName in categories: print("Starting With Category: " + categoryName.upper()) categoryId = categories[categoryName] filename = pathToData + "data/{}/athleteIds/" + categoryName + "{}.csv" athleteIds = findpersonIDs(year, categoryId, categoryName, filename.format(year, year), saveCategoryBestList) # might be removed for x, y in athleteIds.items(): allAthleteIDs[x] = y if not saveCategoryBestList: for athleteName in athleteIds: print("Starting With Athlete: " + athleteName.upper()) athleteId = athleteIds[athleteName] fileNamePerson = pathToData + "data/{}/resultsByAthlete/" + athleteName + "{}.csv" folder = year suffix = year if "TV Unterseen" in athleteName: if athleteName[-2:-1] == "T": suffix = "{}".format(year)[-2:] getPersonBestList(year, athleteId, fileNamePerson.format(folder, suffix)) allathletes = [] for k, v in allAthleteIDs.items(): allathletes.append([k, v]) exportCSV(allathletes, pathToData + "data/{}/allAthleteIds{}.csv".format(year, year))
def findAthletesToInsert(resultsFile, urlAthleteCheck): tvuID = "1.BE.0159" table = [] with open(resultsFile, mode="r") as f: for line in f: table.append(line.split(";")) firstNameId = table[0].index("FirstName") lastNameId = table[0].index("LastName") birthYearId = table[0].index("Yob") clubCodeId = table[0].index("ClubCode") uniqueAthletes = set() existingAthletes = set() for l in table[1:10000]: if l[clubCodeId] == tvuID: fullName = l[firstNameId] + " " + l[lastNameId] birthYear = l[birthYearId] if not (fullName, birthYear) in uniqueAthletes and not ( fullName, birthYear) in existingAthletes: if not checkAthleteExists(urlAthleteCheck, fullName, birthYear): uniqueAthletes.add((fullName, birthYear)) else: existingAthletes.add((fullName, birthYear)) exportCSV( uniqueAthletes, "notExistingAthletes{}.csv".format( datetime.datetime.now().strftime("%Y%m%d%H%M"))) if len(uniqueAthletes) == 0: print("\n\nReady For Insertation. All Athletes are in the DB\n\n")
def clearLicenses(rawFile, clearedFile): table = [] with open(rawFile, mode="r", encoding="utf-8") as f: for line in f: table.append(line.split(";")) unique_data = [list(x) for x in set(tuple(x) for x in table)] exportCSV(unique_data, clearedFile) return unique_data
def findpersonIDs(year, blcat, catName, exportfile, saveCategoryBestList): # Set headers headers = requests.utils.default_headers() headers.update({ 'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0' }) data = { "mobile": "false", "blyear": year, "acc": "ACC_1.BE.0159", "blcat": blcat, "disci": "DALL", "top": "30" } urlFrame = "https://alabus.swiss-athletics.ch/satweb/faces/bestlistclub.xhtml?" req = requests.post(urlFrame, data=data) doc = BeautifulSoup(req.text, 'html.parser') if saveCategoryBestList: parseCategoryFromDoc(doc, exportfile[:-4] + "allResults.csv") row_tags = doc.find_all('tr') teamCounter = 0 ids = {} for row in row_tags: columns = row.find_all('td') for column in columns: links = column.find_all('a') for link in links: onclick = link.get("onclick") if "einzelner-athlet-bestenliste-neu" in onclick: athlet = link.text.strip() idPositionStart = onclick.find( "con=") + 4 # not including the con= idPositionEnd = onclick.find("&", idPositionStart, -1) athleteId = onclick[idPositionStart:idPositionEnd] if "TV Unterseen" in athlet: teamCounter += 1 suffix = "" if teamCounter > 1: suffix = "T{}".format(teamCounter) athlet = "TV Unterseen " + catName + suffix ids[athlet] = athleteId table = [["athleteName", "swissAthleticsDBId"]] for name in ids: table.append([name, ids[name]]) exportCSV(table, exportfile) return ids
def parseCategoryFromDoc(doc, exportfile): row_tags = doc.find_all('tr') disz_tags = doc.find_all('h3') ndis = 0 #starts at 0 because there are no othe h3 table = [] for row in row_tags: columns = row.find_all('td') if row.text.strip()[:2] == "Nr": # print(disz_tags[ndis].text.strip()) table.append([disz_tags[ndis].text.strip()]) ndis = ndis + 1 columns = row.find_all('th') # print(row.text.strip()) table_row = [] tooltip = False for column in columns: if "Tooltip" in str(column): valueIdentifier = ':resultValue">' value = findbyIdentifiers(str(column), valueIdentifier, '</span>') tooltipIdentifier = '<div class="ui-tooltip-text ui-shadow ui-corner-all">' tooltip = findbyIdentifiers(str(column), tooltipIdentifier, '</div>') table_row.append(value) else: table_row.append(column.text.strip().replace("*", "")) if row.text.strip()[:2] == "Nr": table_row.append("Tooltip") if tooltip != False: table_row.append(tooltip) else: table_row.append("") table.append(table_row) exportCSV(table, exportfile)
def fromAlabustoTxt(alabusFile, txtFile): doc = xml.dom.minidom.parse(alabusFile) athletes = doc.getElementsByTagName("athlete") table = [] for athlete in athletes: licenseSA = athlete.getAttribute("license") lastName = getText(athlete, "lastName") firstName = getText(athlete, "firstName") birthDate = getText(athlete, "birthDate") accountCode = getText(athlete, "accountCode") if type(accountCode) != None and type(birthDate) != None: clubID = accountCode if clubID == "1.BE.0159": table.append( [licenseSA, lastName, firstName, birthDate, clubID]) else: print("******************************************************" + firstName + " " + lastName) exportCSV(table, txtFile) print("end")
def getPersonBestList(year, personID, exportFile): # Set headers headers = requests.utils.default_headers() headers.update({ 'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0'}) data = {"mobile": "false", "blyear": year, "con": personID , "blcat": "W", "disci": "DALL", "top": "30", "srb": "0"} urlFrame = "https://alabus.swiss-athletics.ch/satweb/faces/bestlistathlete.xhtml?" req = requests.post(urlFrame, data=data) doc = BeautifulSoup(req.text, 'html.parser') disz_tags = doc.find_all('h3') onlyDisziplin = ""; spans = doc.find_all('span') if len(disz_tags) == 1 or len(disz_tags) == 0: # is one if an athlete has only one disziplin and zero for teams as they do not have an geburtsdatum for span in spans: # print(str(span)) if "Disziplin" in str(span): selects = span.find_all("select") for select in selects: options = select.find_all("option") for option in options: disciplinId = option.get("value") disName = option.text.strip() onlyDisziplin = disName data["disci"] = disciplinId req = requests.post(urlFrame, data=data) doc = BeautifulSoup(req.text, 'html.parser') ndis = 0 # starts at 0 row_tags = doc.find_all('tr') table = [] if len(disz_tags) > 0: # for teams birthYear = disz_tags[ndis].get_text() table.append(["Geb. Dat.", birthYear]) ndis += 1 # because the first is the geburtsdatum for row in row_tags: columns = row.find_all('td') if row.text.strip()[:2] == "Nr": disziplinName = disz_tags[ndis].text.strip() if len(disz_tags) >= 2 else onlyDisziplin table.append([disziplinName, ""]) ndis = ndis + 1 columns = row.find_all('th') table_row = [] tooltip = False; for column in columns: if "Tooltip" in str(column): valueIdentifier = ':resultValue">' value = findbyIdentifiers(str(column), valueIdentifier, '</span>') tooltipIdentifier = '<div class="ui-tooltip-text ui-shadow ui-corner-all">' tooltip = findbyIdentifiers(str(column), tooltipIdentifier, '</div>') table_row.append(value) else: if "Es sind keine Daten vorhanden" not in str(row.text.strip()): table_row.append(column.text.strip().replace("*", "")) if row.text.strip()[:2] == "Nr": table_row.append("Tooltip"); if tooltip != False: table_row.append(tooltip); else: table_row.append(""); table.append(table_row) # print(table[-1]) exportCSV(table, exportFile) return table
clearedLicensesTVU = "clearedLicencesTVU.csv" unique_data = clearLicenses(rawExportFile, clearedLicensesTVU) urlLocal = "http://localhost/statistik_tool_tvu/tvustat/public/updateAthleteLicence.php" urlWeb = "http://tvulive.bplaced.net/tvustat/public/updateAthleteLicence.php" changed = [] notChanged = [] i = 0 for data in unique_data: if i > 10000: i = i + 1 else: print(data) i = i + 1 firstName = data[2] lastName = data[1] birthDate = data[3] licenseSA = data[0] result = updateAthleteLicense(firstName, lastName, birthDate, licenseSA, urlLocal) change = [firstName, lastName, birthDate, licenseSA] if result: changed.append(change) else: notChanged.append(change) exportCSV(changed, "changedPeople.csv") exportCSV(notChanged, "NOTchangedPeople.csv")