Python PornhubItem примеры использования

Язык программирования: Python

Пространство имен/Пакет: pornhub.items

Класс/Тип: PornhubItem

Примеров на hotexamples.com: 7

Python PornhubItem - 7 примеров найдено. Это лучшие примеры Python кода для pornhub.items.PornhubItem, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

PornhubItem(7)

Основные методы

PornhubItem (7)

Пример #1

Показать файл

 def video_page(self, response: HtmlResponse):
     video_title = response.css('h1.title').css('span::text').get()
     video_channel = response.css('div.video-actions-container').css(
         'div.usernameWrap.clearfix').css('a::text').get()
     js = response.css('div.video-wrapper').css('#player').css(
         'script').get()
     data_video_id = response.css('div.video-wrapper').css(
         '#player::attr(data-video-id)').get()
     prepare_js = js.split('<script type="text/javascript">')[1].split(
         'loadScriptUniqueId')[0]
     exec_js = '{0}\nqualityItems_{1};'.format(prepare_js, data_video_id)
     js_result = js2py.eval_js(exec_js)  # type: js2py.base.JsObjectWrapper
     quality_items = js_result.to_list()  # type: list
     quality = quality_items[-1]['text'].split('p')[0]
     if int(quality) >= 720:
         video_url = quality_items[-1]['url']
         self.logger.info('parse [%s] success, url: %s', video_title,
                          video_url)
         if self.settings.get('ENABLE_SQL'):
             result = self.data_base.select_all_by_title_my_follow(
                 video_title)
             if len(result) != 0:
                 for line in result:
                     self.logger.error('has duplicate record: %s', line)
             else:
                 self.data_base.save_my_follow(video_title, video_channel,
                                               video_url, response.url)
         yield PornhubItem(file_urls=video_url,
                           file_name=video_title,
                           file_channel=video_channel)

Пример #2

Показать файл

Файл: pornhub_spider.py Проект: zhangkun0625/pornhubSpider

 def content(self, response):
     print(response)
     item = PornhubItem()
     info = re.search('var flashvars(.*)=(.*?);\n', Selector(response).extract()).group()
     result = json.loads(re.findall('(\{.*?\});', info)[0])
     mediaDefinitions = result.get('mediaDefinitions')
     count = len(mediaDefinitions)
     for i in range(0, count):
         videoUrl = mediaDefinitions[i]['videoUrl']
         if videoUrl != '':
             item['file_urls'] = [videoUrl]
     item['name'] = result.get('video_title')
     yield item

Пример #3

Показать файл

Файл: pornHubSpider.py Проект: kingmanfly/python

    def parse_ph_info(self, response):
        #i['domain_id'] = response.xpath('//input[@id="sid"]/@value').extract()
        #i['name'] = response.xpath('//div[@id="name"]').extract()
        #i['description'] = response.xpath('//div[@id="description"]').extract()
        phItem = PornhubItem()
        _ph_info = re.findall('var flashvars =(.*?),\n', response.body)

        _ph_info_json = json.loads(_ph_info[0])
        duration = _ph_info_json.get('video_duration')
        phItem['video_duration'] = duration
        title = _ph_info_json.get('video_title')
        phItem['video_title'] = title
        image_url = _ph_info_json.get('image_url')
        phItem['image_url'] = image_url
        link_url = _ph_info_json.get('link_url')
        phItem['link_url'] = link_url
        quality_480p = _ph_info_json.get('quality_480p')
        phItem['quality_480p'] = quality_480p
        yield phItem

Пример #4

Показать файл

Файл: pronhub.py Проект: Fengzhishiyue/scrapyDemo

 def parse(self, response):
     LL = response.css("#videoCategory .wrap")
     for l in LL:
         item = PornhubItem()
         item["imageUrl"] = l.css("img::attr(data-thumb_url)").extract()[0]
         item["linkUrl"] = ("https://www.pornhub.com" +
                            l.css(".title a::attr(href)").extract()[0])
         item["name"] = l.css(".title a::text").extract()[0]
         item["playNum"] = l.css(
             ".videoDetailsBlock var::text").extract()[0]
         item["recommendation"] = l.css(
             ".videoDetailsBlock .value::text").extract()[0]
         item["time"] = l.css(".duration::text").extract()[0]
         yield item
         # https://www.pornhub.com
     nextP1 = response.css(".page_next a::attr(href)").extract()[0]
     nextP = "https://www.pornhub.com" + nextP1
     if self.count < 20:  # 判断是否存在下一页
         self.count += 1
         yield scrapy.Request(nextP, callback=self.parse)

Пример #5

Показать файл

 def parse_ph_info(self, response):
     phItem = PornhubItem()
     selector = Selector(response)
     # logging.info(selector)
     _ph_info = re.findall('var flashvars =(.*?),\n', selector.extract())
     logging.debug('PH信息的JSON:')
     logging.debug(_ph_info)
     _ph_info_json = json.loads(_ph_info[0])
     duration = _ph_info_json.get('video_duration')
     phItem['video_duration'] = duration
     title = _ph_info_json.get('video_title')
     phItem['video_title'] = title
     image_url = _ph_info_json.get('image_url')
     phItem['image_url'] = image_url
     link_url = _ph_info_json.get('link_url')
     phItem['link_url'] = link_url
     quality_480p = _ph_info_json.get('quality_480p')
     phItem['quality_480p'] = quality_480p
     logging.info('duration:' + duration + ' title:' + title +
                  ' image_url:' + image_url + ' link_url:' + link_url)
     yield phItem

Пример #6

Показать файл

Файл: my_follow.py Проект: wll0203/pornhub

 def video_page(self, response: HtmlResponse):
     # some video has "Watch Full Video" button
     full_video_button = response.css("#trailerFullLengthDownload")
     video_title = response.css('h1.title').css('span::text').get()
     video_channel = response.css('div.video-actions-container').css(
         'div.usernameWrap.clearfix').css('a::text').get()
     if full_video_button:
         button_title = full_video_button.css('::attr(data-title)').get()
         if button_title != 'Buy Full Video':
             full_url = full_video_button.css('::attr(href)').get()
             self.logger.info('%s detected full video, original name: %s',
                              video_channel, video_title)
             yield scrapy.Request(full_url,
                                  callback=self.video_page,
                                  priority=100)
         else:
             self.logger.info('%s detected buy video, drop', video_channel)
     else:
         self.logger.info('get model: %s, title: %s', video_channel,
                          video_title)
         player_id_element = response.css('#player')
         js = player_id_element.css('script').get()
         data_video_id = player_id_element.css(
             '::attr(data-video-id)').get()
         prepare_js = js.split('<script type="text/javascript">')[1].split(
             'loadScriptUniqueId')[0]
         exec_js = '{0}\nqualityItems_{1};'.format(prepare_js,
                                                   data_video_id)
         js_result = js2py.eval_js(
             exec_js)  # type: js2py.base.JsObjectWrapper
         quality_items = js_result.to_list()  # type: list
         quality = quality_items[-1]['text']
         if quality != '240p' or quality != '"480p"':
             video_url = quality_items[-1]['url']
             yield PornhubItem(file_urls=video_url,
                               file_name=video_title,
                               file_channel=video_channel,
                               parent_url=response.url)

Пример #7

Показать файл

    def video_content(self, response):
        tag, duration = response.meta.get('item')

        item = PornhubItem()

        link_url = response.url

        try:
            title = response.xpath('//span[@class="inlineFree"]/text()').get()
        except:
            title = None

        try:
            count = response.xpath('//span[@class="count"]/text()').get()
        except:
            count = None

        try:
            video_tags = ','.join(
                response.xpath(
                    '//div[@class="categoriesWrapper"]/a//text()').getall())
        except:
            video_tags = None

        try:
            percent = response.xpath('//span[@class="percent"]/text()').get()
        except:
            percent = None

        try:
            img_url = response.xpath(
                '//meta[@property="og:image"]/@content').get()
        except:
            img_url = None

        # 得到视频截图,其中S{?} 代表有多少个视频截图,得到值,将img切割,推导式拼接
        # 有一定情况出现问题
        try:
            video_screenshot_img = re.findall(
                '"urlPattern":"(.*?)","thumbHeig', response.text)[0]
            num = int(re.findall('S{(\d+)}', video_screenshot_img)[0])
            start_video_img = video_screenshot_img.split('S{')[0]
            video_screenshot_imgs = [
                start_video_img + 'S{}.jpg'.format(i) for i in range(1, num)
            ]
        except:
            video_screenshot_imgs = None

        # 有一定情况出现问题
        # 网络小水管,1080p是必须登陆才能在页面上看到的,如果需要请携带cookies重写中间件
        # 如果需要下载视频,请携带请求头请求,不然返回403
        # if '"quality":"1080"' in response.text:
        #     video_url = re.findall('"quality":"1080","videoUrl":"(.*?)"},',response.text,re.S|re.I)[0]
        try:
            if '"quality":"720"' in response.text:
                video_url = re.findall('"quality":"720","videoUrl":"(.*?)"},',
                                       response.text, re.S | re.I)[0]
            elif '"quality":"480"' in response.text:
                video_url = re.findall('"quality":"480","videoUrl":"(.*?)"},',
                                       response.text, re.S | re.I)[0]
            elif '"quality":"240"' in response.text:
                video_url = re.findall('"quality":"240","videoUrl":"(.*?)"},',
                                       response.text, re.S | re.I)[0]
        except:
            # video_url = None
            with open('erorr_request.txt', 'a') as f:
                f.write(title + ',' + link_url)
                f.write('\n')

        item['tag'] = tag
        item['duration'] = duration
        item['title'] = title
        item['link_url'] = link_url
        item['count'] = count
        item['video_tags'] = video_tags
        item['percent'] = percent
        item['img_url'] = img_url
        item['video_screenshot_imgs'] = video_screenshot_imgs
        item['video_url'] = video_url

        yield item