Exemplo n.º 1
0
    def InsData(self, url, newskind, DateDif):
        res = Func.GetHeader(url)
        if res == '':
            print 'Time Out!'
        else:
            soup = bs(res, 'html.parser')

            souptitle = soup.find('h2', attrs={'class': 'news_title'})
            title = souptitle.text

            timesoup = soup.find('span', attrs={'class': 'gray9 mgl10'})
            timestr = timesoup.text
            newstime = datetime.datetime.strptime(timestr, '%Y-%m-%d %H:%M')

            source = ''
            cmt = 0
            author = ''

            txtsoup = soup.find('div', attrs={'class': 'newsDetail_txt'})
            newstxt = txtsoup.text.strip()

            now = datetime.datetime.now() - datetime.timedelta(days=DateDif)
            currdate = newstime

            if (now.year == currdate.year and now.month == currdate.month
                    and now.day == currdate.day) or DateDif == -1:

                conn = MySQLdb.connect(host='.',
                                       port=3306,
                                       user='******',
                                       passwd='123456',
                                       db='news_info',
                                       charset='utf8')
                cur = conn.cursor()

                #insert数据
                dictypename = {1: '双色球', 2: '福彩3D', 3: "七彩乐"}
                sql = 'insert into tbl_news_info(title,newstype,source,pubtime,typename,webname,newsurl,contenttxt,joincount,author) select \'%s\',\'%s\',\'%s\',\'%s\',\'%s\',\'%s\',\'%s\',\'%s\',%d,\'%s\' from DUAL' % (
                    title, '其它', source, newstime, dictypename[newskind],
                    '澳客网', url, newstxt, cmt, author)
                try:
                    cur.execute(sql)
                    cur.close()
                    conn.commit()
                    conn.close()
                except Exception, e:
                    print 'str(Exception):\t', str(Exception)
                    print 'str(e):\t\t', str(e)
                    print 'repr(e):\t', repr(e)
                    print 'e.message:\t', e.message
                    print 'traceback.print_exc():'
                    traceback.print_exc()
                    print 'traceback.format_exc():\n%s' % traceback.format_exc(
                    )
                    print 'url:%s' % url
                    print '########################################################'
                finally:
                    pass
Exemplo n.º 2
0
    def InsData(self, url, newskind, newstype, DateDif):
        res = Func.GetHeader(url)
        if res == '':
            print 'Time Out!'
        else:
            soup = bs(res, 'html.parser')

            soupinfo = soup.find('div', attrs={'class', 'w700 fl'})

            souptitle = soupinfo.find('dt')
            title = souptitle.text.strip()

            infolist = soup.find('dd', attrs={'class': 'dd_time'})('span')
            timestr = infolist[0].text.replace('发布时间:', '')
            source = infolist[1].text.replace('来源:', '')

            newstime = datetime.datetime.strptime(timestr, '%Y-%m-%d %H:%M')

            soupcon = soup.find('dd', attrs={'class': 'dd_content'})

            newstxt = ''
            for con in soupcon('p'):
                newstxt = newstxt + con.text.strip()

            cmt = 0
            author = ''

            now = datetime.datetime.now() - datetime.timedelta(days=DateDif)
            currdate = newstime

            if (now.year == currdate.year and now.month == currdate.month
                    and now.day == currdate.day) or DateDif == -1:

                conn = MySQLdb.connect(host='.',
                                       port=3306,
                                       user='******',
                                       passwd='123456',
                                       db='news_info',
                                       charset='utf8')
                cur = conn.cursor()

                #insert数据
                dictypename = {1: '新闻中心', 2: '福彩公益', 3: '彩站风采', 4: '彩友之家'}
                if newskind == 1:
                    dicttype = {1: '福彩要闻', 2: '公告公示', 3: '福彩活动'}
                elif newskind == 2:
                    dicttype = {1: '公益新闻', 2: '公益活动', 3: '政策法规', 4: '公益掠影'}
                elif newskind == 3:
                    dicttype = {1: '福彩动态', 2: '江城名站', 3: '站长之星'}
                elif newskind == 4:
                    dicttype = {
                        1: '江城彩讯',
                        2: '双色球',
                        3: '福彩3d',
                        4: '七乐彩',
                        5: '30选5',
                        6: '快3',
                        7: '刮刮乐',
                        8: '无纸化'
                    }
                sql = 'insert into tbl_news_info(title,newstype,source,pubtime,typename,webname,newsurl,contenttxt,joincount,author) select \'%s\',\'%s\',\'%s\',\'%s\',\'%s\',\'%s\',\'%s\',\'%s\',%d,\'%s\' from DUAL' % (
                    title, dicttype[newstype], source, newstime,
                    dictypename[newskind], '快乐福彩', url, newstxt, cmt, author)
                try:
                    cur.execute(sql)
                    cur.close()
                    conn.commit()
                    conn.close()
                except Exception, e:
                    print 'str(Exception):\t', str(Exception)
                    print 'str(e):\t\t', str(e)
                    print 'repr(e):\t', repr(e)
                    print 'e.message:\t', e.message
                    print 'traceback.print_exc():'
                    traceback.print_exc()
                    print 'traceback.format_exc():\n%s' % traceback.format_exc(
                    )
                    print 'url:%s' % url
                    print '########################################################'
                finally:
                    pass