Exemplo n.º 1
0
def crawDailyStockComments(link,webNet):
    currentList = []
    startContext = HGStockNetSpiderUtils.returnStartContext(link,'<ul class="ul-news-list">')
    startContext = HGStockNetSpiderUtils.filterContextByTarget(startContext,'<ul','</ul>')
    len = HGStockNetSpiderUtils.findAllTarget(startContext,'<li>')
    for i in range(len):
        targetContext = HGStockNetSpiderUtils.divisionTarget(startContext,'<li>','</li>')
        startContext = targetContext['nextContext']
        currentContext =  targetContext['targetContext']
        linkUrl = webNet + HGStockNetSpiderUtils.filterContextByTarget(currentContext,'<a href="','">')
        title = HGStockNetSpiderUtils.filterContextByTarget(currentContext,'">','</a>')
        currentYear = str(time.strftime('%Y',time.localtime(time.time())))+'-'
        pubDate = currentYear + HGStockNetSpiderUtils.filterContextByTarget(currentContext,'[',']')
        descriptContext = crawDailyDescriptContext(linkUrl)
        currentList.append([str(uuid.uuid1()),linkUrl,title,pubDate,descriptContext,'STOCK','HGNET'])
    return currentList