Exemplo n.º 1
0
def main():
        
        startUrl="http://goodgame.ru/video/"
        page='http://goodgame.ru/video/page/%s/'
        maxPages=dumper.extractMaxPages(requests.get(startUrl).text)
        crawledData={}

        for i in range(1,int(maxPages)+1):
                # for i in range(1,5):
                print "Grabbing %s page"%i
                grabDataFromPage(page%i,crawledData)
                
        f=codecs.open(RESULT_FILE,"w+","UTF-8")
        f.write('<head><meta charset="UTF-8"></head>')

        streamers=crawledData.keys()
        for s in sorted(streamers):
                count=len(crawledData[s]["streams"])
                f.write("<a href='#%s'>%s %s</a><br>"%(s,s,count))
        
        for k in streamers:
                f.write("<a name='%s'></a>"%k)
                f.write("<h1>%s</h1>"%k)
        for stream in crawledData[k]["streams"]:
                f.write("<br>")
                f.write("<a href='%s' target='_blank'>%s</a>"%(stream["url"],stream["title"]))
                
        f.close()
Exemplo n.º 2
0
def test_extractMaxPages():
    """getting total pages number from first page"""
    from dumper import extractMaxPages
    data = fileAsString("video.html")
    maxPagesActual = extractMaxPages(data)
    maxPagesExpected = 147
    assert maxPagesExpected == maxPagesActual