def get_scenes(self, response): itemlist = [] jsondata = json.loads(response.text) data = jsondata['results'] for jsonentry in data: item = SceneItem() item['performers'] = [] for model in jsonentry['actors']: item['performers'].append(model['name'].title()) item['title'] = self.cleanup_title(jsonentry['title']) item['description'] = self.cleanup_description(jsonentry['long_description']) if not item['description']: item['description'] = '' item['image'] = jsonentry['thumb']['image'] if not item['image']: item['image'] = None item['image_blob'] = None item['id'] = jsonentry['id'] item['trailer'] = '' item['url'] = "https://femjoy.com" + jsonentry['url'] item['date'] = self.parse_date(jsonentry['release_date'].strip()).isoformat() item['site'] = "FemJoy" item['parent'] = "FemJoy" item['network'] = "FemJoy" item['tags'] = [] days = int(self.days) if days > 27375: filterdate = "0000-00-00" else: filterdate = date.today() - timedelta(days) filterdate = filterdate.strftime('%Y-%m-%d') if self.debug: if not item['date'] > filterdate: item['filtered'] = "Scene filtered due to date restraint" print(item) else: if filterdate: if item['date'] > filterdate: itemlist.append(item.copy()) else: itemlist.append(item.copy()) item.clear() return itemlist
def get_scenes(self, response): scenelist = [] scenes = response.xpath('//div[@class="item "]') for scene in scenes: item = SceneItem() item['performers'] = [] item['tags'] = [] item['trailer'] = '' item['image'] = '' item['description'] = '' item['network'] = "Sins Life" item['parent'] = "Sins Life" item['site'] = "Sins Life" title = scene.xpath( './/div[contains(@class,"item-title")]/a/text()').get() if title: item['title'] = self.cleanup_title(title) externalid = re.sub('[^a-zA-Z0-9-]', '', item['title']) item['id'] = externalid.lower().strip().replace(" ", "-") item['url'] = response.url description = scene.xpath( './/div[@class="item-meta"]/div/text()').getall() if description: description = " ".join(description) description = description.replace(" ", " ") item['description'] = self.cleanup_description(description) item['date'] = self.parse_date('today').isoformat() image = scene.xpath('.//img/@src0_3x').get() if not image: image = scene.xpath('.//img/@src').get() if image: item['image'] = image.strip() else: item['image'] = None item['image_blob'] = None if item['id'] and item['title'] and item['date']: scenelist.append(item.copy()) item.clear() return scenelist
def get_scenes(self, response): meta = response.meta jsondata = response.json()['data']['video']['list']['result']['edges'] for jsonrow in jsondata: item = SceneItem() sceneid = jsonrow['node']['videoId'] item['id'] = sceneid.replace(":", "-") item['title'] = self.cleanup_title(jsonrow['node']['title']) item['description'] = self.cleanup_description( jsonrow['node']['description']['long']) item['performers'] = [] for performer in jsonrow['node']['talent']: item['performers'].append(performer['talent']['name']) item['site'] = "Thicc 18" item['network'] = "Thicc 18" item['parent'] = "Thicc 18" item['url'] = "https://thicc18.com/videos/" + sceneid.replace( ':', '%3A') item['date'] = self.parse_date('today').isoformat() item['trailer'] = '' item['tags'] = ['Big Ass'] meta['item'] = item.copy() imagedata = jsonrow['node']['videoId'].split(":") imagequery = { "operationName": "BatchFindAssetQuery", "variables": { "paths": [ "/members/models/" + imagedata[0] + "/scenes/" + imagedata[1] + "/videothumb.jpg", ] }, "query": "query BatchFindAssetQuery($paths: [String!]!) {\n asset {\n batch(input: {paths: $paths}) {\n result {\nserve {\n uri\n}\n}\n}\n}\n}\n" } url = "https://thicc18.team18.app/graphql" imagequery = json.dumps(imagequery) yield Request(url, headers=self.headers, body=imagequery, method="POST", callback=self.get_images, meta=meta)
def parse_scenepage(self, response): itemlist = [] meta = response.meta parsed_uri = urlparse(response.url) domain = '{uri.scheme}://{uri.netloc}/'.format(uri=parsed_uri) jsondata = json.loads(response.text) data = jsondata['data'] for jsonentry in data: item = SceneItem() item['performers'] = [] for model in jsonentry['actors']: model['name'] = model['name'].replace("+", "&").strip() if "&" in model['name']: models = model['name'].split("&") for star in models: item['performers'].append(star.strip().title()) else: item['performers'].append(model['name'].title()) item['title'] = jsonentry['title_en'] if len(re.findall(r'\w+', item['title'])) == 1 and len( item['performers']): if len(item['performers']) > 1: item['title'] = ", ".join( item['performers']) + " (" + item['title'] + ")" else: item['title'] = item['performers'][0] + " (" + item[ 'title'] + ")" item['description'] = jsonentry['description_en'] if not item['description']: item['description'] = '' item['image'] = jsonentry['screenshots'][0] if isinstance(item['image'], str): item['image'] = "https:" + item['image'] else: item['image'] = None item['image_blob'] = None item['id'] = jsonentry['id'] item['trailer'] = '' item['url'] = domain + "video/" + str( jsonentry['id']) + "/" + jsonentry['slug'] item['date'] = self.parse_date( jsonentry['publication_start'].strip()).isoformat() if not item['date']: item['date'] = self.parse_date( jsonentry['created_at'].strip()).isoformat() item['site'] = meta['site'] item['parent'] = "Teen Core Club" item['network'] = "Teen Core Club" item['tags'] = [] days = int(self.days) if days > 27375: filterdate = "0000-00-00" else: filterdate = date.today() - timedelta(days) filterdate = filterdate.strftime('%Y-%m-%d') if self.debug: if not item['date'] > filterdate: item['filtered'] = "Scene filtered due to date restraint" print(item) else: if filterdate: if item['date'] > filterdate: itemlist.append(item.copy()) else: itemlist.append(item.copy()) item.clear() return itemlist
def parse_scenepage(self, response): scenelist = [] scenes = response.xpath('//article[contains(@class,"post")]') for scene in scenes: item = SceneItem() item['performers'] = [] item['title'] = '' item['id'] = '' title = scene.xpath('./header/h2/a/text()').get() if title: item['title'] = self.cleanup_title(title) url = scene.xpath('./header/h2/a/@href').get() if url: item['url'] = url.strip() scene_id = re.search(r'.*/(.*?)/$', item['url']).group(1) if scene_id: item['id'] = scene_id.strip() scenedate = scene.xpath( './/time[contains(@class,"published")]/@datetime').get() if scenedate: scenedate = scenedate.strip() else: scenedate = self.parse_date(scenedate).isoformat() item['date'] = scenedate description = scene.xpath( './/div[@class="entry-content"]/p/text()').getall() if not description: description = scene.xpath( './/div[@class="entry-content"]//img/following-sibling::text()' ).getall() if description: description = list(map(lambda x: x.strip(), description)) description = " ".join(description) item['description'] = self.cleanup_description(description) else: item['description'] = '' image = scene.xpath( './/div[@class="entry-content"]/figure//img/@src').get() if not image: image = scene.xpath( './/img[contains(@src,"uploads")]/@src').get() if image: image = image.strip() else: image = None item['image'] = image item['image_blob'] = None performers = scene.xpath( './/span[@class="cat-links"]/a/text()').getall() if performers: performers = list(map(lambda x: x.strip(), performers)) item['performers'] = performers else: item['performers'] = [] tags = scene.xpath( './/span[@class="tags-links"]/a/text()').getall() if tags: tags = list(map(lambda x: x.strip().title(), tags)) item['tags'] = tags else: item['tags'] = [] item['trailer'] = '' item['parent'] = "XXX Horror" item['network'] = "XXX Horror" item['site'] = "XXX Horror" if item['id']: days = int(self.days) if days > 27375: filterdate = "0000-00-00" else: filterdate = date.today() - timedelta(days) filterdate = filterdate.strftime('%Y-%m-%d') if self.debug: if not item['date'] > filterdate: item[ 'filtered'] = "Scene filtered due to date restraint" print(item) else: if filterdate: if item['date'] > filterdate: scenelist.append(item.copy()) else: scenelist.append(item.copy()) item.clear() return scenelist
def get_scenes(self, response): scenes = response.xpath('//div[@class="item-episode"]') scenelist = [] for scene in scenes: item = SceneItem() item['site'] = "Arch Angel" item['parent'] = "Arch Angel" item['network'] = "Arch Angel" title = scene.xpath('.//h3/a/text()') if title: item['title'] = self.cleanup_title(title.get()) else: item['title'] = 'No Title Available' scenedate = scene.xpath( './/strong[contains(text(),"Date")]/following-sibling::text()') if scenedate: item['date'] = self.parse_date(scenedate.get()).isoformat() else: item['date'] = self.parse_date('today').isoformat() performers = scene.xpath( './div[@class="item-info"]//a[contains(@href,"/models/") or contains(@href,"sets.php")]/text()' ).getall() if len(performers): item['performers'] = list( map(lambda x: string.capwords(x.strip()), performers)) else: item['performers'] = [] image = scene.xpath('.//span[@class="left"]/a/img/@src0_1x').get() if image: image = image.replace('//', '/').strip() image = image.replace('#id#', '').strip() image = "https://www.archangelvideo.com" + image item['image'] = image.strip() else: item['image'] = None item['image_blob'] = None item['trailer'] = '' url = scene.xpath('.//span[@class="left"]/a/@href').get() if url: item['url'] = url.strip() external_id = re.search(r'.*/(.*).html', url).group(1) if external_id: item['id'] = external_id.strip().lower() else: item['id'] = '' else: item['url'] = '' item['description'] = '' item['tags'] = [] if item['title'] and item['id']: days = int(self.days) if days > 27375: filterdate = "0000-00-00" else: filterdate = date.today() - timedelta(days) filterdate = filterdate.strftime('%Y-%m-%d') if self.debug: if not item['date'] > filterdate: item[ 'filtered'] = "Scene filtered due to date restraint" print(item) else: if filterdate: if item['date'] > filterdate: scenelist.append(item.copy()) else: scenelist.append(item.copy()) item.clear() return scenelist
def parse_scenepage(self, response): scenelist = [] if "brutaldungeon" in response.url: scenes = response.xpath( '//div[contains(@class,"download-box-large")]/label/..') else: scenes = response.xpath( '//div[@class="row"]/div[contains(@class,"content-image-video")]' ) for scene in scenes: item = SceneItem() item['performers'] = [] item['title'] = '' item['id'] = '' if "brutaldungeon" in response.url: title = scene.xpath('.//h1/text()').get() else: title = scene.xpath( './/div[contains(@class,"vedio-text-area")]/h4/text()' ).get() if title: title = title.strip() if "Latina Patrol" in title: performers = title.replace("Latina Patrol", "") word_list = performers.split() if len(word_list) == 2 or len(word_list) == 3: item['performers'] = [performers.strip()] if "Teens In The Woods" in title: performers = title.replace("Teens In The Woods", "") performers = performers.replace("-", "") performers = performers.replace("&", ",") performers = performers.strip() performerlist = performers.split(",") for performeritem in performerlist: word_list = performeritem.split() if len(word_list) == 2 or len(word_list) == 3: item['performers'].append(performeritem.strip()) if "brutalpickups" in response.url or "bdsmprison" in response.url: item['performers'] = [title] if "brutaldungeon" in response.url: item['performers'] = [] if title: title = title.strip() item['title'] = title if "brutaldungeon" in response.url: date = scene.xpath( './/span[contains(@class,"date")]/text()').get() else: date = scene.xpath( './/div[contains(@class,"image-text-area")]/h4/text()' ).get() if date: date = date.strip() item['date'] = self.parse_date(date.strip()).isoformat() else: item['date'] = self.parse_date('today').isoformat() if "brutaldungeon" in response.url: description = scene.xpath('.//p/text()').get() else: description = scene.xpath('.//h5/text()').get() if description: description = description.replace("Description: ", "").strip() item['description'] = description else: item['description'] = '' image = scene.xpath('.//video/@poster').get() if not image: image = scene.xpath( './/div[contains(@class,"image-section-blk")]/a/img/@src' ).get() if not image: image = scene.xpath( './/div[contains(@class,"image-section-blk")]/a/img/@data-src' ).get() if not image: image = scene.xpath( './/label[@class="player"]//video/@poster').get() if not image: image = scene.xpath( './/label[@class="player"]/a/img/@src').get() if image: baseurl = re.search(r'(.*\/t2\/)', response.url).group(1) image = baseurl + image.strip() else: image = None item['image'] = image item['image_blob'] = None idcode = '' if re.search(r'p\d{3,4}_s\d{3,4}_\d{3,4}_', item['image']): idcode = re.search(r'p\d{3,4}_s\d{3,4}_(\d{3,4})_', item['image']).group(1) if idcode: item['id'] = idcode.strip() else: if item['title']: item['id'] = item['title'].replace(" ", "-") item['url'] = response.url item['tags'] = [] item['trailer'] = '' item['parent'] = "Fetish Network" item['network'] = "Fetish Network" sitename = tldextract.extract(response.url).domain item['site'] = match_site(sitename) if item['id']: scenelist.append(item.copy()) item.clear() return scenelist
def get_scenes(self, response): scenes = response.xpath('//div[@class="movie-set-list-item"]') scenelist = [] for scene in scenes: item = SceneItem() item['site'] = "Aletta Ocean Live" item['parent'] = "Aletta Ocean Live" item['network'] = "Aletta Ocean" title = scene.xpath('.//div[contains(@class,"title")]/text()') if title: item['title'] = self.cleanup_title(title.get()) else: item['title'] = 'No Title Available' scenedate = scene.xpath('.//div[contains(@class,"date")]/text()') if scenedate: item['date'] = self.parse_date( scenedate.get().strip()).isoformat() else: item['date'] = self.parse_date('today').isoformat() item['performers'] = ['Aletta Ocean'] image = scene.xpath('./@style').get() if image: image = re.search(r'url\((.*.jpg)', image).group(1) if image: item['image'] = image.strip() else: item['image'] = '' item['image_blob'] = '' item['trailer'] = '' url = scene.xpath('./a/@href').get() if url: item['url'] = url.strip() external_id = re.search(r'.*/(.*).html', url).group(1) if external_id: item['id'] = external_id.strip().lower() else: item['id'] = '' else: item['url'] = '' item['description'] = '' item['tags'] = [] if item['title'] and item['id']: days = int(self.days) if days > 27375: filterdate = "0000-00-00" else: filterdate = date.today() - timedelta(days) filterdate = filterdate.strftime('%Y-%m-%d') if self.debug: if not item['date'] > filterdate: item[ 'filtered'] = "Scene filtered due to date restraint" print(item) else: if filterdate: if item['date'] > filterdate: scenelist.append(item.copy()) else: scenelist.append(item.copy()) item.clear() return scenelist