示例#1
0
def generate_tour_data():
    urlfetch.set_default_fetch_deadline(45)
    data_order=["date","stage-icons","stage-link","name","stage-winner","stage-leader","km"]
    page = urllib2.urlopen("http://www.procyclingstats.com/race/Tour_de_France_2015-stages")
    soup = BeautifulSoup(page, "html.parser")
    tabulka = soup.find("table", {"id" : "list5"})
    data={}
    stage_count=0
    for row in tabulka.findAll('tr'):
        col = row.findAll('td')
        data={}
        images=[]
        count=1
        for td in col:
            #Looking for the date
            if(td.string):
                if(count==1):
                    data["date"]=td.string
                    count+=1
            #Do we have icons?
            imgs=td.findAll('img')
            if imgs:
                for img in imgs:
                    images.append(img["src"])
                data["stage_icons"]=images
            #Names for the winner and the leader
            links = td.findAll('a')

            for link in links:
                if count==2:
                    if(link.string):
                        data["name"]=link.string
                        data["stage-link"]="http://www.procyclingstats.com/"+link['href']
                    else:
                        data["stage-link"]=""
                if count==3:
                    if(link.string):
                        data["stage-winner"]=link.string
                    else:
                        data["stage-winner"]=""
                if count==4:
                    if(link.string):
                        try:
                            km = float(link.string)
                            data["km"]=link.string
                            data["stage-leader"]=""
                        except ValueError:
                            data["stage-leader"]=link.string
                    else:
                        data["stage-leader"]=""
                if count==5:
                    if(link.string):
                        data["km"]=link.string
                    else:
                        data["km"]=""

                count+=1

        if data:
            stage_count+=1
            data["stage"]=str(stage_count)
            if(stage_count<22):
                stage = Stage(id=stage_count,data=json.dumps(data))
                stage.put()