Exemplo n.º 1
0
def writeCompanyNews():
    link = 'http://stock.stockstar.com/list/company.htm'
    currentLinkList = [link]
    currentContext = ThemeNewsSpiderUtils.returnStartContext(link,'<div class="pageControl">')
    startContext = ThemeNewsSpiderUtils.filterContextByTarget(currentContext,'<span class="current">1</span>','</a></div>')
    for i in [0,1,2,3,4,5]:
        targetContext = ThemeNewsSpiderUtils.divisionTarget(startContext, '<a', '</a>')
        startContext = targetContext['nextContext']
        currentcontext =  targetContext['targetContext']
        link = 'http://stock.stockstar.com'+ThemeNewsSpiderUtils.filterContextByTarget(currentcontext,'<a href="','" target="_self"')
        currentLinkList.append(link)
    writeCompanyNewsByLink(currentLinkList)
def writeCompanyNews():
    link = 'http://stock.stockstar.com/list/company.htm'
    currentLinkList = [link]
    currentContext = ThemeNewsSpiderUtils.returnStartContext(
        link, '<div class="pageControl">')
    startContext = ThemeNewsSpiderUtils.filterContextByTarget(
        currentContext, '<span class="current">1</span>', '</a></div>')
    for i in [0, 1, 2, 3, 4, 5]:
        targetContext = ThemeNewsSpiderUtils.divisionTarget(
            startContext, '<a', '</a>')
        startContext = targetContext['nextContext']
        currentcontext = targetContext['targetContext']
        link = 'http://stock.stockstar.com' + ThemeNewsSpiderUtils.filterContextByTarget(
            currentcontext, '<a href="', '" target="_self"')
        currentLinkList.append(link)
    writeCompanyNewsByLink(currentLinkList)
def crawCompanyNews(link):
    filterContext = ThemeNewsSpiderUtils.returnStartContext(
        link, '<div class="listnews">')
    startContext = ThemeNewsSpiderUtils.filterContextByTarget(
        filterContext, '<ul>', '</ul>')
    len = ThemeNewsSpiderUtils.findAllTarget(startContext, '<li')
    currentList = []
    for i in range(len):
        targetContext = ThemeNewsSpiderUtils.divisionTarget(
            startContext, '<li>', '</li>')
        startContext = targetContext['nextContext']
        currentcontext = targetContext['targetContext']
        keyid = str(uuid.uuid1())
        linkUrl = ThemeNewsSpiderUtils.filterContextByTarget(
            currentcontext, '<a href="', '">')
        pubDate = ThemeNewsSpiderUtils.filterContextByTarget(
            currentcontext, '<span>', '</span>')
        title = ThemeNewsSpiderUtils.filterContextByTarget(
            currentcontext, '">', '</a>')
        currentTime = time.strftime("%Y-%m-%d", time.localtime())
        if (pubDate[:10] != currentTime):
            break
        if linkUrl != '':
            currentList.append([keyid, linkUrl, pubDate, title, 'STOCKSTAR'])
    return currentList
Exemplo n.º 4
0
def writeCompanyNewsByLink(currentLinkList):
    conn = ThemeNewsSpiderUtils.getMySQLConn()
    cursor = conn.cursor()
    try:
        cursor.execute("DELETE FROM STOCK_POOL_IMPORTANT_NEWS_TABLE")
        conn.commit()
    except conn.Error,e:
        print "Mysql Error %d: %s" % (e.args[0], e.args[1])
        conn.rollback()
def writeCompanyNewsByLink(currentLinkList):
    conn = ThemeNewsSpiderUtils.getMySQLConn()
    cursor = conn.cursor()
    try:
        cursor.execute("DELETE FROM STOCK_POOL_IMPORTANT_NEWS_TABLE")
        conn.commit()
    except conn.Error, e:
        print "Mysql Error %d: %s" % (e.args[0], e.args[1])
        conn.rollback()
Exemplo n.º 6
0
def writeThemeDailyNewsByLink(currentLinkList):
    conn = ThemeNewsSpiderUtils.getMySQLConn()
    cursor = conn.cursor()

    try:
        cursor.execute("DELETE FROM STOCK_POOL_THEME_NEWS_TABLE WHERE SOURCEFLAG = 'STOCKNET'")
        conn.commit()
    except conn.Error, e:
        print "Mysql Error %d: %s" % (e.args[0], e.args[1])
        conn.rollback()
Exemplo n.º 7
0
def writeThemeDailyNewsByLink(currentLinkList):
    conn = ThemeNewsSpiderUtils.getMySQLConn()
    cursor = conn.cursor()

    try:
        cursor.execute(
            "DELETE FROM STOCK_POOL_THEME_NEWS_TABLE WHERE SOURCEFLAG = 'STOCKNET'"
        )
        conn.commit()
    except conn.Error, e:
        print "Mysql Error %d: %s" % (e.args[0], e.args[1])
        conn.rollback()
Exemplo n.º 8
0
def crawThemeDailyNews(link):
    filterContext = ThemeNewsSpiderUtils.returnStartContext(link, '<div class="listnews">')
    startContext = ThemeNewsSpiderUtils.filterContextByTarget(filterContext, "<ul>", "</ul>")
    len = ThemeNewsSpiderUtils.findAllTarget(startContext, "<li")
    currentList = []
    for i in range(len):
        targetContext = ThemeNewsSpiderUtils.divisionTarget(startContext, "<li>", "</li>")
        startContext = targetContext["nextContext"]
        currentcontext = targetContext["targetContext"]
        keyid = str(uuid.uuid1())
        linkUrl = ThemeNewsSpiderUtils.filterContextByTarget(currentcontext, '<a href="', '">')
        pubDate = ThemeNewsSpiderUtils.filterContextByTarget(currentcontext, "<span>", "</span>")
        title = ThemeNewsSpiderUtils.filterContextByTarget(currentcontext, '">', "</a>")
        currentTime = time.strftime("%Y-%m-%d", time.localtime())
        if pubDate[:10] != currentTime:
            break
        if linkUrl != "":
            currentList.append([keyid, linkUrl, pubDate, title, "STOCKNET"])
    return currentList
Exemplo n.º 9
0
def  crawCompanyNews(link):
    filterContext = ThemeNewsSpiderUtils.returnStartContext(link,'<div class="listnews">')
    startContext = ThemeNewsSpiderUtils.filterContextByTarget(filterContext,'<ul>','</ul>')
    len = ThemeNewsSpiderUtils.findAllTarget(startContext,'<li')
    currentList = []
    for  i in range(len):
        targetContext = ThemeNewsSpiderUtils.divisionTarget(startContext, '<li>', '</li>')
        startContext = targetContext['nextContext']
        currentcontext =  targetContext['targetContext']
        keyid = str(uuid.uuid1())
        linkUrl = ThemeNewsSpiderUtils.filterContextByTarget(currentcontext,'<a href="', '">')
        pubDate = ThemeNewsSpiderUtils.filterContextByTarget(currentcontext,'<span>','</span>')
        title = ThemeNewsSpiderUtils.filterContextByTarget(currentcontext,'">','</a>')
        currentTime = time.strftime("%Y-%m-%d",time.localtime())
        if(pubDate[:10]!=currentTime):
            break
        if linkUrl != '':
            currentList.append([keyid,linkUrl,pubDate,title,'STOCKSTAR'])
    return currentList
Exemplo n.º 10
0
def  crawCompanyNews(link):
    filterContext = ThemeNewsSpiderUtils.returnStartContext(link,'<div class="listnews" id="TacticNewsList1" >')
    startContext = ThemeNewsSpiderUtils.filterContextByTarget(filterContext,'<ul>','</ul>')
    len = ThemeNewsSpiderUtils.findAllTarget(startContext,'<li')
    newsFlag = 'good'
    currentList = []
    for  i in range(len):
        targetContext = ThemeNewsSpiderUtils.divisionTarget(startContext, '<li>', '</li>')
        startContext = targetContext['nextContext']
        currentcontext =  targetContext['targetContext']
        keyid = str(uuid.uuid1())
        linkUrl = ThemeNewsSpiderUtils.filterContextByTarget(currentcontext,'<a href="', '">')
        pubDate = ThemeNewsSpiderUtils.filterContextByTarget(currentcontext,'<span>','</span>')
        title = ThemeNewsSpiderUtils.filterContextByTarget(currentcontext,'">','</a>')
        if linkUrl != '':
            currentList.append([keyid,linkUrl,pubDate,title,newsFlag])
    
    currentFilterContext = ThemeNewsSpiderUtils.returnStartContext(link,'<div class="listnews" id="TacticNewsList2"  style="display:none;">')
    currentstartContext = ThemeNewsSpiderUtils.filterContextByTarget(currentFilterContext,'<ul>','</ul>')
    currentlen = ThemeNewsSpiderUtils.findAllTarget(currentstartContext,'<li')
    newsFlag = 'bad'
    for  m in range(currentlen):
        targetContext = ThemeNewsSpiderUtils.divisionTarget(currentstartContext, '<li>', '</li>')
        currentstartContext = targetContext['nextContext']
        currentcontext =  targetContext['targetContext']
        keyid = str(uuid.uuid1())
        linkUrl = ThemeNewsSpiderUtils.filterContextByTarget(currentcontext,'<a href="', '">')
        pubDate = ThemeNewsSpiderUtils.filterContextByTarget(currentcontext,'<span>','</span>')
        title = ThemeNewsSpiderUtils.filterContextByTarget(currentcontext,'">','</a>')
        if linkUrl != '':
            currentList.append([keyid,linkUrl,pubDate,title,newsFlag])
    return currentList
def crawCompanyNews(link):
    filterContext = ThemeNewsSpiderUtils.returnStartContext(
        link, '<div class="listnews" id="TacticNewsList1" >')
    startContext = ThemeNewsSpiderUtils.filterContextByTarget(
        filterContext, '<ul>', '</ul>')
    len = ThemeNewsSpiderUtils.findAllTarget(startContext, '<li')
    newsFlag = 'good'
    currentList = []
    for i in range(len):
        targetContext = ThemeNewsSpiderUtils.divisionTarget(
            startContext, '<li>', '</li>')
        startContext = targetContext['nextContext']
        currentcontext = targetContext['targetContext']
        keyid = str(uuid.uuid1())
        linkUrl = ThemeNewsSpiderUtils.filterContextByTarget(
            currentcontext, '<a href="', '">')
        pubDate = ThemeNewsSpiderUtils.filterContextByTarget(
            currentcontext, '<span>', '</span>')
        title = ThemeNewsSpiderUtils.filterContextByTarget(
            currentcontext, '">', '</a>')
        if linkUrl != '':
            currentList.append([keyid, linkUrl, pubDate, title, newsFlag])

    currentFilterContext = ThemeNewsSpiderUtils.returnStartContext(
        link,
        '<div class="listnews" id="TacticNewsList2"  style="display:none;">')
    currentstartContext = ThemeNewsSpiderUtils.filterContextByTarget(
        currentFilterContext, '<ul>', '</ul>')
    currentlen = ThemeNewsSpiderUtils.findAllTarget(currentstartContext, '<li')
    newsFlag = 'bad'
    for m in range(currentlen):
        targetContext = ThemeNewsSpiderUtils.divisionTarget(
            currentstartContext, '<li>', '</li>')
        currentstartContext = targetContext['nextContext']
        currentcontext = targetContext['targetContext']
        keyid = str(uuid.uuid1())
        linkUrl = ThemeNewsSpiderUtils.filterContextByTarget(
            currentcontext, '<a href="', '">')
        pubDate = ThemeNewsSpiderUtils.filterContextByTarget(
            currentcontext, '<span>', '</span>')
        title = ThemeNewsSpiderUtils.filterContextByTarget(
            currentcontext, '">', '</a>')
        if linkUrl != '':
            currentList.append([keyid, linkUrl, pubDate, title, newsFlag])
    return currentList