Пример #1
0
def crawDailyComments(link):
    startContext = HuiTongNetSpiderUtils.returnStartContext(link,'<div class="list_content">')
    filterContext = HuiTongNetSpiderUtils.filterContextByTarget(startContext,
                     '<div class="list_content">','<div class="list_flip">')
    templinkUrl = ''
    temptitle = ''
    tempDate=''
    linkWeb = 'http://www.fx678.com'
    currentList = []
    i = 1
    while  i< 9 :
        if i%2 ==0:
            divideContext =HuiTongNetSpiderUtils.divisionTargetIncludeContext(filterContext,
                                            '<div class="list_content01 ">','<div class="list_content01 bggrey">')
        else:
            divideContext = HuiTongNetSpiderUtils.divisionTargetIncludeContext(filterContext,
                                            '<div class="list_content01 bggrey">','<div class="list_content01 ">')
        targetContext = divideContext['targetContext']
        filterContext = divideContext['nextContext']
        
        if i%2 !=0:
            templinkUrl = linkWeb+HuiTongNetSpiderUtils.filterContextByTarget(targetContext,'<a href="','.html"')+'.html'
            temptitle = HuiTongNetSpiderUtils.filterContextByTarget(targetContext,'target="_blank">','</a>')
            tempDate = HuiTongNetSpiderUtils.filterContextByTarget(targetContext,
                                                '<div class="list_content01_titler">',' </div>')
            currentYear = str(time.strftime('%Y',time.localtime(time.time())))
            tempDate = currentYear+'-'+tempDate.replace('/','-')
        if i%2 ==0:
            currentContext = HuiTongNetSpiderUtils.filterAfterContext(targetContext,'<div class="list_content01_content">')
            descriptContext = HuiTongNetSpiderUtils.filterContextByTarget(currentContext,'target="_blank">','</a>')
            descriptContext = HuiTongNetSpiderUtils.removeSpecialCharacter(descriptContext)
            currentList.append([str(uuid.uuid1()),templinkUrl,temptitle,tempDate,descriptContext,'FOREX','FX678'])
        i += 1
        #print targetContext
    return currentList
Пример #2
0
def writeDailyComments():
    link = 'http://www.fx678.com/news/forex/scpl.html'
    currentList = crawDailyComments(link);
    conn = HuiTongNetSpiderUtils.getMySQLConn()
    cursor = conn.cursor()
    try:
        cursor.execute("DELETE  FROM  COMMENTS_NEWS_RESOURCE_TABLE  WHERE  SOURCEFLAG = 'FX678'")
        conn.commit()
    except conn.Error,e:
        print "Mysql Error %d: %s" % (e.args[0], e.args[1])
        conn.rollback()