Пример #1
 def updateShowDetail(self,s_id):
     db = Database(self.log,self.config)
     urlTarget = self.config.url+db.getOneLinkBySid(s_id)
     cookie = cookielib.CookieJar()
     opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookie))
     req = urllib2.Request(
         url = urlTarget
     htmlData = ""
     htmlData = opener.open(req).read()
     if htmlData:
         bsContent = BeautifulSoup(htmlData)
         pinfo = bsContent.find('p',attrs={'class':'sumtext'}).get_text()  #要取到的剧的介绍
         pinfo = pinfo.replace("'", "\\'")
         DivLarge = bsContent.find('aside',attrs={'class':'quikinfo'})
         DivSmall = DivLarge.findAll('li')
         update_time = DivSmall[0].a.get_text()
         length = DivSmall[1].get_text()
         length = length[17:]
         area = DivSmall[3].get_text()
         area = area[10:]
         channel = DivSmall[2].get_text()
         channel = channel[10:]
         status = DivSmall[5].get_text()
         status = status[14:]
         #print status
         DetailOfShow = {
             's_id' : s_id,
             's_description' : pinfo,
             'update_time' : update_time,
             'length' : length,
             'area' : area,
             'channel' : channel,
             'status' : status
         print DetailOfShow
Пример #2
    def workWithOneShowsEp(self,s_id):
        db = Database(self.log,self.config)
        urlTarget = self.config.url+db.getOneLinkBySid(s_id)
        cookie = cookielib.CookieJar()
        opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookie))
        req = urllib2.Request(
            url = urlTarget
        htmlData = ""
        htmlData = opener.open(req).read()
        if htmlData:
            bsContent = BeautifulSoup(htmlData)
            bsLists = bsContent.findAll('li',attrs = {'class':'parent'})
            print ('the len of biLists is '+str(len(bsLists)))
            for oneSeason in bsLists:
                #print oneSeason
                se_id = oneSeason.strong.get_text()
                se_id = re.search('Season\s\d{1,2}',se_id).group()
                se_id = se_id[7:]

                epList = oneSeason.findAll('li',attrs = {'class':'ep info  RAWR'}) + oneSeason.findAll('li',attrs = {'class':'ep info '})

                for oneEpisode in epList:
                    #print oneEpisode
                    e_num = oneEpisode.find('span',attrs = {'class':'pnumber'}).get_text()
                    if e_num[0] == '0' and len(e_num) > 1:
                        e_num = e_num[1:]
                    e_name = oneEpisode.find('a',attrs = {'itemprop':'url'}).get_text()
                    #e_name = MySQLdb.escape_string(e_name)

                    time_temp = oneEpisode.find('span',attrs = {'class':'datepub'})
                    e_time = time_temp['content']
                    time_temp = time_temp.get_text()
                    time = time_temp[-7:]

                    hour = re.search('\d{1,2}:',time).group()
                    hour = hour[:-1]
                    hour = string.atoi(hour)
                    minute = re.search(':\d{2}[a|p]m',time).group() 
                    if (minute[-2] == 'p') and (hour != 12):
                        hour += 12
                    minute = minute[1:-2]

                    if len(str(hour)) < 2:
                        hour = '0' + str(hour)
                    e_time += ' ' + str(hour) + ':' + minute + ':00'

                    status_temp = oneEpisode.find('span',attrs = {'class':'paired'})
                    if status_temp:
                        e_status = u'已播放'
                        e_status = u'即将播出'

                    episodeInfoToBeAired = {
                        's_id' : s_id,
                        'se_id' : se_id,
                        'e_num' : e_num,
                        'e_name' : e_name,
                        'e_status' : e_status,
                        'e_description' : '',
                        'e_time' : e_time

                    print episodeInfoToBeAired
                flag = True
                if flag == False: