示例#1
0
def stage_detail_parse(stage_number,url):
    data={}
    urlfetch.set_default_fetch_deadline(45)
    images_json=[]
    data_order=["day","month","avg-speed","cat","start-finish"]
    page = urllib2.urlopen(url)
    soup = BeautifulSoup(page, "html.parser")
    tabulka = soup.find("h3", {"class" : "section"})
    div = tabulka.parent
    images = soup.findAll('img')
    for image in images:
        if "Stage" in image["src"]:
            images_json.append(image["src"])
        if "Final_GC" in image["src"]:
            images_json.append(image["src"])
        if "site-icons" in image["src"]:
            data['stage-icon']=image["src"]
        cont=0

        data['stage-images']=images_json

        for element in tabulka.parent:
            if(cont<len(data_order)):
                if element.name is None and "\n" not in element.string and element.string !=" " and  "Tag for network 919" not in element.string:
                    #The interesting information doesn't have a tag
                    data[data_order[cont]]=element.string
                    cont+=1
    print stage_number
    stage=Stage.get_by_id(int(stage_number))
    stage_data=json.loads(stage.data)
    stage_data.update(data)
    stage.data=json.dumps(stage_data)
    stage.put()