def generate_tour_data(): urlfetch.set_default_fetch_deadline(45) data_order=["date","stage-icons","stage-link","name","stage-winner","stage-leader","km"] page = urllib2.urlopen("http://www.procyclingstats.com/race/Tour_de_France_2015-stages") soup = BeautifulSoup(page, "html.parser") tabulka = soup.find("table", {"id" : "list5"}) data={} stage_count=0 for row in tabulka.findAll('tr'): col = row.findAll('td') data={} images=[] count=1 for td in col: #Looking for the date if(td.string): if(count==1): data["date"]=td.string count+=1 #Do we have icons? imgs=td.findAll('img') if imgs: for img in imgs: images.append(img["src"]) data["stage_icons"]=images #Names for the winner and the leader links = td.findAll('a') for link in links: if count==2: if(link.string): data["name"]=link.string data["stage-link"]="http://www.procyclingstats.com/"+link['href'] else: data["stage-link"]="" if count==3: if(link.string): data["stage-winner"]=link.string else: data["stage-winner"]="" if count==4: if(link.string): try: km = float(link.string) data["km"]=link.string data["stage-leader"]="" except ValueError: data["stage-leader"]=link.string else: data["stage-leader"]="" if count==5: if(link.string): data["km"]=link.string else: data["km"]="" count+=1 if data: stage_count+=1 data["stage"]=str(stage_count) if(stage_count<22): stage = Stage(id=stage_count,data=json.dumps(data)) stage.put()