Пример #1
0
def extractTVUData(year, pathToData, categories, saveCategoryBestList):
    allAthleteIDs = {}
    for categoryName in categories:
        print("Starting With Category: " + categoryName.upper())
        categoryId = categories[categoryName]
        filename = pathToData + "data/{}/athleteIds/" + categoryName + "{}.csv"
        athleteIds = findpersonIDs(year, categoryId, categoryName, filename.format(year, year), saveCategoryBestList)
        
        # might be removed
        for x, y in athleteIds.items():
            allAthleteIDs[x] = y
         
        if not saveCategoryBestList:     
            for athleteName in athleteIds:
                print("Starting With Athlete: " + athleteName.upper())
                athleteId = athleteIds[athleteName]
                fileNamePerson = pathToData + "data/{}/resultsByAthlete/" + athleteName + "{}.csv"
                folder = year
                suffix = year
                if "TV Unterseen" in athleteName:
                    if athleteName[-2:-1] == "T":
                        suffix = "{}".format(year)[-2:]
                getPersonBestList(year, athleteId, fileNamePerson.format(folder, suffix))
                
    allathletes = []
    for k, v in allAthleteIDs.items():
        allathletes.append([k, v])
    exportCSV(allathletes, pathToData + "data/{}/allAthleteIds{}.csv".format(year, year))
Пример #2
0
def findAthletesToInsert(resultsFile, urlAthleteCheck):
    tvuID = "1.BE.0159"
    table = []
    with open(resultsFile, mode="r") as f:
        for line in f:
            table.append(line.split(";"))

    firstNameId = table[0].index("FirstName")
    lastNameId = table[0].index("LastName")
    birthYearId = table[0].index("Yob")
    clubCodeId = table[0].index("ClubCode")
    uniqueAthletes = set()
    existingAthletes = set()
    for l in table[1:10000]:
        if l[clubCodeId] == tvuID:
            fullName = l[firstNameId] + " " + l[lastNameId]
            birthYear = l[birthYearId]
            if not (fullName, birthYear) in uniqueAthletes and not (
                    fullName, birthYear) in existingAthletes:
                if not checkAthleteExists(urlAthleteCheck, fullName,
                                          birthYear):
                    uniqueAthletes.add((fullName, birthYear))
                else:
                    existingAthletes.add((fullName, birthYear))
    exportCSV(
        uniqueAthletes, "notExistingAthletes{}.csv".format(
            datetime.datetime.now().strftime("%Y%m%d%H%M")))
    if len(uniqueAthletes) == 0:
        print("\n\nReady For Insertation. All Athletes are in the DB\n\n")
def clearLicenses(rawFile, clearedFile):
    table = []
    with open(rawFile, mode="r", encoding="utf-8") as f:
        for line in f:
            table.append(line.split(";"))

    unique_data = [list(x) for x in set(tuple(x) for x in table)]
    exportCSV(unique_data, clearedFile)
    return unique_data
Пример #4
0
def findpersonIDs(year, blcat, catName, exportfile, saveCategoryBestList):
    # Set headers
    headers = requests.utils.default_headers()
    headers.update({
        'User-Agent':
        'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0'
    })

    data = {
        "mobile": "false",
        "blyear": year,
        "acc": "ACC_1.BE.0159",
        "blcat": blcat,
        "disci": "DALL",
        "top": "30"
    }

    urlFrame = "https://alabus.swiss-athletics.ch/satweb/faces/bestlistclub.xhtml?"

    req = requests.post(urlFrame, data=data)
    doc = BeautifulSoup(req.text, 'html.parser')

    if saveCategoryBestList:
        parseCategoryFromDoc(doc, exportfile[:-4] + "allResults.csv")

    row_tags = doc.find_all('tr')

    teamCounter = 0
    ids = {}
    for row in row_tags:
        columns = row.find_all('td')
        for column in columns:
            links = column.find_all('a')

            for link in links:
                onclick = link.get("onclick")
                if "einzelner-athlet-bestenliste-neu" in onclick:
                    athlet = link.text.strip()
                    idPositionStart = onclick.find(
                        "con=") + 4  # not including the con=
                    idPositionEnd = onclick.find("&", idPositionStart, -1)
                    athleteId = onclick[idPositionStart:idPositionEnd]
                    if "TV Unterseen" in athlet:
                        teamCounter += 1
                        suffix = ""
                        if teamCounter > 1:
                            suffix = "T{}".format(teamCounter)
                        athlet = "TV Unterseen " + catName + suffix
                    ids[athlet] = athleteId

    table = [["athleteName", "swissAthleticsDBId"]]
    for name in ids:
        table.append([name, ids[name]])

    exportCSV(table, exportfile)
    return ids
Пример #5
0
def parseCategoryFromDoc(doc, exportfile):
    row_tags = doc.find_all('tr')
    disz_tags = doc.find_all('h3')
    ndis = 0  #starts at 0 because there are no othe h3

    table = []

    for row in row_tags:
        columns = row.find_all('td')

        if row.text.strip()[:2] == "Nr":
            #             print(disz_tags[ndis].text.strip())
            table.append([disz_tags[ndis].text.strip()])
            ndis = ndis + 1
            columns = row.find_all('th')


#         print(row.text.strip())
        table_row = []
        tooltip = False
        for column in columns:
            if "Tooltip" in str(column):
                valueIdentifier = ':resultValue">'
                value = findbyIdentifiers(str(column), valueIdentifier,
                                          '</span>')
                tooltipIdentifier = '<div class="ui-tooltip-text ui-shadow ui-corner-all">'
                tooltip = findbyIdentifiers(str(column), tooltipIdentifier,
                                            '</div>')
                table_row.append(value)
            else:
                table_row.append(column.text.strip().replace("*", ""))

        if row.text.strip()[:2] == "Nr":
            table_row.append("Tooltip")
        if tooltip != False:
            table_row.append(tooltip)
        else:
            table_row.append("")
        table.append(table_row)

    exportCSV(table, exportfile)
def fromAlabustoTxt(alabusFile, txtFile):
    doc = xml.dom.minidom.parse(alabusFile)

    athletes = doc.getElementsByTagName("athlete")

    table = []

    for athlete in athletes:
        licenseSA = athlete.getAttribute("license")
        lastName = getText(athlete, "lastName")
        firstName = getText(athlete, "firstName")
        birthDate = getText(athlete, "birthDate")
        accountCode = getText(athlete, "accountCode")
        if type(accountCode) != None and type(birthDate) != None:
            clubID = accountCode
            if clubID == "1.BE.0159":
                table.append(
                    [licenseSA, lastName, firstName, birthDate, clubID])
        else:
            print("******************************************************" +
                  firstName + " " + lastName)

    exportCSV(table, txtFile)
    print("end")
Пример #7
0
def getPersonBestList(year, personID, exportFile):
        # Set headers  
    headers = requests.utils.default_headers()
    headers.update({ 'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0'})
   
    data = {"mobile": "false",
            "blyear": year,
            "con": personID ,
            "blcat": "W",
            "disci": "DALL",
            "top": "30",
            "srb": "0"}
   
    urlFrame = "https://alabus.swiss-athletics.ch/satweb/faces/bestlistathlete.xhtml?"

    req = requests.post(urlFrame, data=data)
    doc = BeautifulSoup(req.text, 'html.parser')

    disz_tags = doc.find_all('h3')
    onlyDisziplin = "";
    spans = doc.find_all('span')

    if len(disz_tags) == 1 or len(disz_tags) == 0:  # is one if an athlete has only one disziplin and zero for teams as they do not have an geburtsdatum
        for span in spans:
#             print(str(span))
            if "Disziplin" in str(span):
                selects = span.find_all("select")
                for select in selects:
                    options = select.find_all("option")
                    for option in options:
                        disciplinId = option.get("value")
                        disName = option.text.strip()
                        onlyDisziplin = disName
                        data["disci"] = disciplinId
                        req = requests.post(urlFrame, data=data)
                        doc = BeautifulSoup(req.text, 'html.parser')

    ndis = 0  # starts at 0 
    row_tags = doc.find_all('tr')

    table = []
    if len(disz_tags) > 0:  # for teams
        birthYear = disz_tags[ndis].get_text()
        table.append(["Geb. Dat.", birthYear])
        ndis += 1  # because the first is the geburtsdatum

    for row in row_tags:
        columns = row.find_all('td')

        if row.text.strip()[:2] == "Nr":
            disziplinName = disz_tags[ndis].text.strip() if  len(disz_tags) >= 2 else onlyDisziplin 
            table.append([disziplinName, ""])
            ndis = ndis + 1
            columns = row.find_all('th')

        table_row = []
        tooltip = False;
        for column in columns:
            if "Tooltip" in str(column):
                valueIdentifier = ':resultValue">'
                value = findbyIdentifiers(str(column), valueIdentifier, '</span>')
                tooltipIdentifier = '<div class="ui-tooltip-text ui-shadow ui-corner-all">'
                tooltip = findbyIdentifiers(str(column), tooltipIdentifier, '</div>')
                table_row.append(value)
            else:   
                if "Es sind keine Daten vorhanden" not in str(row.text.strip()): 
                    table_row.append(column.text.strip().replace("*", ""))
        
        if row.text.strip()[:2] == "Nr":
            table_row.append("Tooltip");
        if tooltip != False:
            table_row.append(tooltip);
        else:
            table_row.append("");  
        table.append(table_row)
        
#         print(table[-1])
    
    exportCSV(table, exportFile)
    return table
    clearedLicensesTVU = "clearedLicencesTVU.csv"
    unique_data = clearLicenses(rawExportFile, clearedLicensesTVU)

    urlLocal = "http://localhost/statistik_tool_tvu/tvustat/public/updateAthleteLicence.php"
    urlWeb = "http://tvulive.bplaced.net/tvustat/public/updateAthleteLicence.php"

    changed = []
    notChanged = []

    i = 0
    for data in unique_data:

        if i > 10000:
            i = i + 1
        else:
            print(data)
            i = i + 1
            firstName = data[2]
            lastName = data[1]
            birthDate = data[3]
            licenseSA = data[0]
            result = updateAthleteLicense(firstName, lastName, birthDate,
                                          licenseSA, urlLocal)
            change = [firstName, lastName, birthDate, licenseSA]
            if result:
                changed.append(change)
            else:
                notChanged.append(change)

    exportCSV(changed, "changedPeople.csv")
    exportCSV(notChanged, "NOTchangedPeople.csv")