def update(metadata, siteID, movieGenres, movieActors): Log('******UPDATE CALLED*******') url = str(metadata.id).split("|")[0].replace('_', '/').replace('!', '?') detailsPageElements = HTML.ElementFromURL(url) art = [] metadata.collections.clear() movieGenres.clearGenres() movieActors.clearActors() # Studio metadata.studio = 'Studio Name' # Title metadata.title = detailsPageElements.xpath( '//div[@class="title"]')[0].text_content().strip() # Summary metadata.summary = detailsPageElements.xpath( '//div[@class="summary"]/p')[0].text_content().strip() # Tagline and Collection(s) tagline = PAsearchSites.getSearchSiteName(siteID).strip() metadata.tagline = tagline metadata.collections.add(tagline) # Genres genres = detailsPageElements.xpath('//span[@class="update_tags"]/a') if len(genres) > 0: for genreLink in genres: genreName = genreLink.text_content().strip().lower() movieGenres.addGenre(genreName) # Release Date date = detailsPageElements.xpath( '//div[@class="cell update_date"]')[0].text_content().strip() if len(date) > 0: date_object = datetime.strptime(date, '%B %d, %Y') metadata.originally_available_at = date_object metadata.year = metadata.originally_available_at.year # Actors actors = detailsPageElements.xpath('//span[@class="update_models"]/a') if len(actors) > 0: for actorLink in actors: actorName = str(actorLink.text_content().strip()) try: actorPageURL = actorLink.get("href") actorPage = HTML.ElementFromURL(actorPageURL) actorPhotoURL = actorPage.xpath( '//img[@class="model_bio_thumb"]')[0].get("src") if 'http' not in actorPhotoURL: actorPhotoURL = PAsearchSites.getSearchBaseURL( siteID) + actorPhotoURL except: actorPhotoURL = "" movieActors.addActor(actorName, actorPhotoURL) # Director director = metadata.directors.new() try: directors = detailsPageElements.xpath('//p[@class="director"]/a') for dirname in directors: director.name = dirname.text_content().strip() except: pass ### Posters and artwork ### # Video trailer background image try: twitterBG = detailsPageElements.xpath( '//meta[@name="twitter:image"]')[0].get('content') art.append(twitterBG) except: pass # Photos photos = detailsPageElements.xpath( '//img[contains(@class, "update_thumbs")]') if len(photos) > 0: for photoLink in photos: photo = photoLink.get('poster') art.append(photo) # Scene photos page try: photoPageUrl = PAsearchSites.getSearchBaseURL( siteID) + detailsPageElements.xpath( '//a[@class="photo_page"]')[0].get('href') photoPage = HTML.ElementFromURL(photoPageUrl) unlockedPhotos = photoPage.xpath('//a[@class="imgLink"]') for unlockedPhoto in unlockedPhotos: if 'http' not in unlockedPhoto.get('href'): art.append( PAsearchSites.getSearchBaseURL(siteID) + unlockedPhoto.get('href')) else: art.append(unlockedPhoto.get('href')) except: pass j = 1 Log("Artwork found: " + str(len(art))) for posterUrl in art: if not PAsearchSites.posterAlreadyExists(posterUrl, metadata): #Download image file for analysis try: img_file = urllib.urlopen(posterUrl) im = StringIO(img_file.read()) resized_image = Image.open(im) width, height = resized_image.size #Add the image proxy items to the collection if width > 1 or height > width: # Item is a poster metadata.posters[posterUrl] = Proxy.Preview(HTTP.Request( posterUrl, headers={ 'Referer': 'http://www.google.com' }).content, sort_order=j) if width > 100 and width > height: # Item is an art item metadata.art[posterUrl] = Proxy.Preview(HTTP.Request( posterUrl, headers={ 'Referer': 'http://www.google.com' }).content, sort_order=j) j = j + 1 except: pass return metadata
def update(metadata, lang, siteNum, movieGenres, movieActors): metadata_id = str(metadata.id).split('|') sceneURL = PAutils.Decode(metadata_id[0]) if not sceneURL.startswith('http'): sceneURL = PAsearchSites.getSearchBaseURL(siteNum) + sceneURL req = PAutils.HTTPRequest(sceneURL) detailsPageElements = HTML.ElementFromString(req.text) # Title metadata.title = detailsPageElements.xpath( '//div[@class="single-part-details"]//h2')[0].text_content().strip() # Summary metadata.summary = detailsPageElements.xpath( '//div[(contains(@class, "video-bottom-txt"))]')[0].text_content( ).strip() # Studio metadata.studio = PAsearchSites.getSearchSiteName(siteNum) # Tagline and Collection metadata.collections.clear() tagline = PAsearchSites.getSearchSiteName(siteNum) metadata.tagline = tagline metadata.collections.add(tagline) # Release Date date = detailsPageElements.xpath( '//div[@class="video-info-left-icon"]//span[3]/text()')[0].strip() if date: date_object = datetime.strptime(date, '%d%b,%Y') metadata.originally_available_at = date_object metadata.year = metadata.originally_available_at.year # Genres movieGenres.clearGenres() for genreLink in detailsPageElements.xpath( '//div[(contains(@class, "video-tag-section"))]/a'): genreName = genreLink.text_content().strip() movieGenres.addGenre(genreName) # Actors movieActors.clearActors() for actorLink in detailsPageElements.xpath( '//div[(contains(@class, "video-info-left pull-left"))]/h3/span/a' ): actorName = actorLink.text_content().strip() actorPageURL = actorLink.get('href') req = PAutils.HTTPRequest(actorPageURL) actorPage = HTML.ElementFromString(req.text) actorPhotoURL = actorPage.xpath( '//div[(contains(@class, "single-p**n-pic"))]/img/@src')[0] movieActors.addActor(actorName, actorPhotoURL) # Posters/Background art = [] xpaths = ['//div[(contains(@class, "sub-video"))]/a/@href'] for xpath in xpaths: for poster in detailsPageElements.xpath(xpath): poster = poster.split('?')[0] art.append(poster) Log('Artwork found: %d' % len(art)) for idx, posterUrl in enumerate(art, 1): if not PAsearchSites.posterAlreadyExists(posterUrl, metadata): # Download image file for analysis try: image = PAutils.HTTPRequest(posterUrl) im = StringIO(image.content) resized_image = Image.open(im) width, height = resized_image.size # Add the image proxy items to the collection if width > 1: # Item is a poster metadata.posters[posterUrl] = Proxy.Media(image.content, sort_order=idx) if width > 100 and idx > 1: # Item is an art item metadata.art[posterUrl] = Proxy.Media(image.content, sort_order=idx) except: pass return metadata
def update(metadata, lang, siteNum, movieGenres, movieActors): metadata_id = metadata.id.split('|') sceneURL = PAutils.Decode(metadata_id[0]) if not sceneURL.startswith('http'): sceneURL = PAsearchSites.getSearchBaseURL(siteNum) + sceneURL req = PAutils.HTTPRequest(sceneURL) detailsPageElements = HTML.ElementFromString(req.text) # Title metadata.title = detailsPageElements.xpath( '//h2[@class="section-title"]')[0].text_content().strip() # Summary metadata.summary = detailsPageElements.xpath( '//div[@class="update-info-block"]')[1].text_content().replace( 'Description:', '', 1).strip() # Studio metadata.studio = PAsearchSites.getSearchSiteName(siteNum) if 976 <= siteNum <= 978: metadata.studio = 'ExploitedX' # Tagline and Collection(s) metadata.collections.clear() tagline = PAsearchSites.getSearchSiteName(siteNum).strip() metadata.tagline = tagline metadata.collections.add(tagline) # Release Date date = detailsPageElements.xpath( '//div[@class="update-info-row"]')[0].text_content().replace( 'Released:', '', 1).strip() if date: date_object = parse(date) metadata.originally_available_at = date_object metadata.year = metadata.originally_available_at.year # Genres movieGenres.clearGenres() for genreLink in detailsPageElements.xpath('//ul[@class="tags"]//li//a'): genreName = genreLink.text_content().strip() movieGenres.addGenre(genreName) # Actors movieActors.clearActors() for actorLink in detailsPageElements.xpath( '//div[contains(@class, "models-list-thumbs")]//li'): actorName = actorLink.xpath('.//span/text()')[0] actorPhotoURL = actorLink.xpath('.//img//@src0_3x')[0] if not actorPhotoURL.startswith('http'): actorPhotoURL = PAsearchSites.getSearchBaseURL( siteNum) + actorPhotoURL movieActors.addActor(actorName, actorPhotoURL) # Posters art = [] xpaths = ['//div[@class="player-thumb"]//img/@src0_1x'] for xpath in xpaths: for img in detailsPageElements.xpath(xpath): if not img.startswith('http'): img = PAsearchSites.getSearchBaseURL(siteNum) + img art.append(img) Log('Artwork found: %d' % len(art)) for idx, posterUrl in enumerate(art, 1): if not PAsearchSites.posterAlreadyExists(posterUrl, metadata): # Download image file for analysis try: image = PAutils.HTTPRequest(posterUrl) im = StringIO(image.content) resized_image = Image.open(im) width, height = resized_image.size # Add the image proxy items to the collection if width > 1: # Item is a poster metadata.posters[posterUrl] = Proxy.Media(image.content, sort_order=idx) if width > 100: # Item is an art item metadata.art[posterUrl] = Proxy.Media(image.content, sort_order=idx) except: pass return metadata
def update(metadata, lang, siteNum, movieGenres, movieActors, art): metadata_id = str(metadata.id).split('|') sceneURL = PAutils.Decode(metadata_id[0]) sceneDate = metadata_id[2] req = PAutils.HTTPRequest(sceneURL) detailsPageElements = HTML.ElementFromString(req.text) movieGenres.clearGenres() movieActors.clearActors() # Title metadata.title = detailsPageElements.xpath('//h1')[0].text_content() # Summary try: metadata.summary = detailsPageElements.xpath( '//div[h2]')[0].text_content().replace('Read More ...Read Less', '').strip() except: pass # Tagline and Collection(s) metadata.collections.clear() metadata.studio = PAsearchSites.getSearchSiteName(siteNum) metadata.tagline = metadata.studio metadata.collections.add(metadata.studio) # Actors actors = detailsPageElements.xpath('//a[contains(@title, "Model Bio")]') if actors: if len(actors) == 2: movieGenres.addGenre('Threesome') if len(actors) == 3: movieGenres.addGenre('Foursome') if len(actors) > 4: movieGenres.addGenre('Orgy') for actorLink in actors: actorName = actorLink.text_content() actorPhotoURL = '' modelURL = actorLink.xpath('./@href')[0] req = PAutils.HTTPRequest(modelURL) actorsPageElements = HTML.ElementFromString(req.text) img = actorsPageElements.xpath( '//div[@class="model-contr-colone"]//@src')[0] if img: actorPhotoURL = img if 'http' not in actorPhotoURL: actorPhotoURL = PAsearchSites.getSearchBaseURL( siteNum) + actorPhotoURL movieActors.addActor(actorName, actorPhotoURL) fullTitle = re.search(r"(?<=content\/).*(?=\/)", sceneURL).group(0) # Release Date date = actorsPageElements.xpath( '//a[contains(@href, "' + fullTitle + '")]//div[@class="fsdate absolute"]')[0].text_content().strip() if not date and sceneDate: date = sceneDate date = parse(date).strftime('%d-%m-%Y') if date: date_object = datetime.strptime(date, '%d-%m-%Y') metadata.originally_available_at = date_object metadata.year = metadata.originally_available_at.year # Genres for genreLink in detailsPageElements.xpath( '//div[@class="amp-category"]')[0].text_content().split('\n'): genreName = genreLink.strip() movieGenres.addGenre(genreName) # Posters xpaths = [ '//div[@class="amp-vis-mobile"]//@src', ] for xpath in xpaths: for img in detailsPageElements.xpath(xpath): art.append(img) Log('Artwork found: %d' % len(art)) for idx, posterUrl in enumerate(art, 1): if not PAsearchSites.posterAlreadyExists(posterUrl, metadata): # Download image file for analysis try: image = PAutils.HTTPRequest(posterUrl) im = StringIO(image.content) resized_image = Image.open(im) width, height = resized_image.size # Add the image proxy items to the collection if width > 1 or height > width: # Item is a poster metadata.posters[posterUrl] = Proxy.Media(image.content, sort_order=idx) if width > 100 and width > height: # Item is an art item metadata.art[posterUrl] = Proxy.Media(image.content, sort_order=idx) except: pass return metadata
def update(metadata, siteID, movieGenres, movieActors): Log('******UPDATE CALLED*******') metadata_id = str(metadata.id).split('|') sceneId = metadata_id[0] token = getToken(PAsearchSites.getSearchBaseURL(siteID)) detailsPageElements = getDatafromAPI( PAsearchSites.getSearchBaseURL(siteID), sceneId, token) # Studio producerLink = detailsPageElements['producer'] metadata.studio = '%s %s' % (producerLink['name'], producerLink['last_name']) # Title metadata.title = detailsPageElements['title'] # Summary metadata.summary = detailsPageElements['synopsis_clean'] # Tagline and Collection(s) metadata.collections.clear() tagline = PAsearchSites.getSearchSiteName(siteID).strip() metadata.tagline = tagline metadata.collections.add(tagline) # Genres movieGenres.clearGenres() for genreLink in detailsPageElements['tags']: genreName = genreLink['title'] movieGenres.addGenre(genreName) # Release Date date_object = parse(detailsPageElements['release_date']) metadata.originally_available_at = date_object metadata.year = metadata.originally_available_at.year # Actors movieActors.clearActors() for actorLink in detailsPageElements['performers']: actorName = '%s %s' % (actorLink['name'], actorLink['last_name']) actorPhotoURL = actorLink['poster_image'].split('?', 1)[0] movieActors.addActor(actorName, actorPhotoURL) # Director director = metadata.directors.new() directorLink = detailsPageElements['director'] director.name = '%s %s' % (directorLink['name'], directorLink['last_name']) # Poster art = [detailsPageElements['poster_picture'].split('?', 1)[0]] for photoLink in detailsPageElements['album']: img = photoLink['path'].split('?', 1)[0] art.append(img) Log('Artwork found: %d' % len(art)) for idx, posterUrl in enumerate(art, 1): if not PAsearchSites.posterAlreadyExists(posterUrl, metadata): # Download image file for analysis try: img_file = urllib.urlopen(posterUrl) im = StringIO(img_file.read()) resized_image = Image.open(im) width, height = resized_image.size # Add the image proxy items to the collection if width > 1 or height > width: # Item is a poster metadata.posters[posterUrl] = Proxy.Media(HTTP.Request( posterUrl, headers={ 'Referer': 'http://www.google.com' }).content, sort_order=idx) if width > 100 and width > height and idx > 1: # Item is an art item metadata.art[posterUrl] = Proxy.Media(HTTP.Request( posterUrl, headers={ 'Referer': 'http://www.google.com' }).content, sort_order=idx) except: pass return metadata
def update(metadata, siteID, movieGenres, movieActors): Log('******UPDATE CALLED*******') temp = str(metadata.id).split("|")[0].replace('+', '/') url = PAsearchSites.getSearchBaseURL(siteID) + temp detailsPageElements = HTML.ElementFromURL(url) # Summary metadata.studio = "X-Art" paragraphs = detailsPageElements.xpath( '//div[@class="small-12 medium-12 large-12 columns info"]//p') summary = "" for paragraph in paragraphs: summary = summary + '\n\n' + paragraph.text_content() metadata.summary = summary.strip() metadata.title = detailsPageElements.xpath( '//div[@class="row info"]//div[@class="small-12 medium-12 large-12 columns"]' )[0].text_content() date = detailsPageElements.xpath('//h2')[2].text_content()[:-1] date_object = datetime.strptime(date, '%b %d, %Y') metadata.originally_available_at = date_object metadata.year = metadata.originally_available_at.year #Tagline and Collection(s) tagline = PAsearchSites.getSearchSiteName(siteID).strip() metadata.tagline = tagline metadata.collections.add(tagline) # Genres movieGenres.clearGenres() # No Source for Genres, add manual movieGenres.addGenre("Artistic") movieGenres.addGenre("Glamcore") # Actors movieActors.clearActors() actors = detailsPageElements.xpath('//h2//a') if len(actors) > 0: if len(actors) == 3: movieGenres.addGenre("Threesome") if len(actors) == 4: movieGenres.addGenre("Foursome") if len(actors) > 4: movieGenres.addGenre("Orgy") for actorLink in actors: actorName = actorLink.text_content() actorPageURL = actorLink.get("href") actorPage = HTML.ElementFromURL(actorPageURL) actorPhotoURL = actorPage.xpath('//img[@class="info-img"]')[0].get( "src") movieActors.addActor(actorName, actorPhotoURL) # Posters/Background valid_names = list() metadata.posters.validate_keys(valid_names) metadata.art.validate_keys(valid_names) thumbs = [] try: for posterURL in detailsPageElements.xpath( '//div[@class="gallery-item"]//img'): thumbs.append((posterURL.get('src')).replace(" ", "_")) except: Log("No Thumbnails found") background = detailsPageElements.xpath( '//img[contains(@src,"/videos")]')[0].get("src") metadata.art[background] = Proxy.Preview(HTTP.Request(background).content, sort_order=1) try: posterURL = str((thumbs[0]))[:-5] + "2.jpg" except: posterURL = background.replace("1.jpg", "2.jpg").replace( "1-lrg.jpg", "2-lrg.jpg") metadata.posters[posterURL] = Proxy.Preview( HTTP.Request(posterURL).content, sort_order=1) #Extra Posters import random art = [] match = 0 for site in [ "XartFan.com", "HQSluts.com", "ImagePost.com", "XartBeauties.com/galleries" ]: fanSite = PAextras.getFanArt(site, art, actors, actorName, metadata.title, match) match = fanSite[2] if match is 1: break if match is 1 or match is 2: # Return, first few, last one and randóm selection of images # If you want more or less posters edít the value in random.sample below or refresh metadata to get a different sample. try: sample = [art[0], art[1], art[2], art[3], art[-1]] + random.sample( art, 4) art = sample Log("Selecting subset of " + str(len(art)) + " images from the set.") except: pass #else: # art = thumbs try: j = 1 for posterUrl in art: Log("Trying next Image") Log(posterUrl) if not PAsearchSites.posterAlreadyExists(posterUrl, metadata): #Download image file for analysis try: img_file = urllib.urlopen(posterUrl) im = StringIO(img_file.read()) resized_image = Image.open(im) width, height = resized_image.size #Add the image proxy items to the collection if width > 1 or height > width: # Item is a poster metadata.posters[posterUrl] = Proxy.Preview( HTTP.Request(posterUrl, headers={ 'Referer': 'http://www.google.com' }).content, sort_order=j) if width > 100 and width > height: # Item is an art item metadata.art[posterUrl] = Proxy.Preview( HTTP.Request(posterUrl, headers={ 'Referer': 'http://www.google.com' }).content, sort_order=j) j = j + 1 except: Log("there was an issue") metadata.art[posterUrl] = Proxy.Preview(HTTP.Request( posterUrl, headers={ 'Referer': 'http://www.google.com' }).content, sort_order=j) except: pass return metadata
def update(metadata, siteID, movieGenres, movieActors): Log('******UPDATE CALLED*******') metadata_id = str(metadata.id).split('|') url = PAutils.Decode(metadata_id[0]) sceneDate = metadata_id[2] poster = metadata_id[3] if len(metadata_id) > 3 else None detailsPageElements = HTML.ElementFromURL(url) # Studio metadata.studio = 'Full P**n Network' # Title metadata.title = detailsPageElements.xpath( '//h4')[0].text_content().strip() # Summary metadata.summary = detailsPageElements.xpath( '//p[@class="hide-for-small-only"]')[0].text_content().strip() # Release Date if sceneDate: date_object = parse(sceneDate) metadata.originally_available_at = date_object metadata.year = metadata.originally_available_at.year # Tagline and Collection(s) metadata.collections.clear() for seriesName in [ metadata.studio, PAsearchSites.getSearchSiteName(siteID) ]: metadata.collections.add(seriesName) # Genres movieGenres.clearGenres() genres = detailsPageElements.xpath( '//div[@class="small-12"]//a[contains(@href, "/category/")]/text()') for genreLink in genres: genreName = genreLink.strip() movieGenres.addGenre(genreName) # Actors movieActors.clearActors() actors = detailsPageElements.xpath( '//div[@class="small-12"]//a[contains(@href, "/model/")]/@href') for actorLink in actors: actorPage = HTML.ElementFromURL( PAsearchSites.getSearchBaseURL(siteID) + actorLink) actorName = actorPage.xpath('//h1')[0].text_content().strip() actorPhotoURL = actorPage.xpath('//img[@alt="%s"]/@src' % actorName)[0] movieActors.addActor(actorName, actorPhotoURL) # Posters art = [poster] Log('Artwork found: %d' % len(art)) for idx, posterUrl in enumerate(art, 1): if not PAsearchSites.posterAlreadyExists(posterUrl, metadata): # Download image file for analysis try: img_file = urllib.urlopen(posterUrl) im = StringIO(img_file.read()) resized_image = Image.open(im) width, height = resized_image.size # Add the image proxy items to the collection if width > 1: # Item is a poster metadata.posters[posterUrl] = Proxy.Media(HTTP.Request( posterUrl, headers={ 'Referer': 'http://www.google.com' }).content, sort_order=idx) if width > 100: # Item is an art item metadata.art[posterUrl] = Proxy.Media(HTTP.Request( posterUrl, headers={ 'Referer': 'http://www.google.com' }).content, sort_order=idx) except: pass return metadata
def update(metadata, siteNum, movieGenres, movieActors): metadata_id = str(metadata.id).split('|') sceneURL = PAutils.Decode(metadata_id[0]) if not sceneURL.startswith('http'): sceneURL = PAsearchSites.getSearchBaseURL(siteNum) + sceneURL req = PAutils.HTTPRequest(sceneURL) detailsPageElements = HTML.ElementFromString(req.text) # Title metadata.title = detailsPageElements.xpath('//h1')[0].text_content().strip() # Summary metadata.summary = detailsPageElements.xpath('//div[@id="content-more-less"]/p')[0].text_content().strip() # Studio metadata.studio = 'CumLouder' # Tagline and Collection(s) metadata.collections.clear() tagline = PAsearchSites.getSearchSiteName(siteNum).strip() metadata.tagline = tagline metadata.collections.add(tagline) # Genres movieGenres.clearGenres() for genreLink in detailsPageElements.xpath('//ul[@class="tags"]/li/a'): genreName = genreLink.text_content().strip() movieGenres.addGenre(genreName) # Release Date - no actual date aviable, guessing (better than nothing) date = detailsPageElements.xpath('//div[@class="added"]')[0].text_content().strip() timeframe = date.split(' ')[2] timenumber = int(date.split(' ')[1]) today = datetime.now() if timeframe: if timeframe == 'minutes': date_object = today elif timeframe == 'hour' or timeframe == 'hours': date_object = today - relativedelta(hours=timenumber) elif timeframe == 'day' or timeframe == 'days': date_object = today - relativedelta(days=timenumber) elif timeframe == 'week' or timeframe == 'weeks': date_object = today - relativedelta(weeks=timenumber) elif timeframe == 'month' or timeframe == 'months': date_object = today - relativedelta(months=timenumber) elif timeframe == 'year' or timeframe == 'years': date_object = today - relativedelta(years=timenumber) metadata.originally_available_at = date_object metadata.year = metadata.originally_available_at.year # Actors movieActors.clearActors() actors = detailsPageElements.xpath('//a[@class="pornstar-link"]') if actors: if len(actors) == 3: movieGenres.addGenre('Threesome') if len(actors) == 4: movieGenres.addGenre('Foursome') if len(actors) > 4: movieGenres.addGenre('Orgy') for actorLink in actors: actorName = actorLink.text_content().strip() actorPhotoURL = '' movieActors.addActor(actorName, actorPhotoURL) # Posters art = [] xpaths = [ '//div[@class="box-video box-video-html5"]/video/@lazy' ] for xpath in xpaths: for img in detailsPageElements.xpath(xpath): art.append(img) Log('Artwork found: %d' % len(art)) for idx, posterUrl in enumerate(art, 1): if not PAsearchSites.posterAlreadyExists(posterUrl, metadata): # Download image file for analysis try: image = PAutils.HTTPRequest(posterUrl, headers={'Referer': 'http://www.google.com'}) im = StringIO(image.content) resized_image = Image.open(im) width, height = resized_image.size # Add the image proxy items to the collection if width > 1: # Item is a poster metadata.posters[posterUrl] = Proxy.Media(image.content, sort_order=idx) if width > 100: # Item is an art item metadata.art[posterUrl] = Proxy.Media(image.content, sort_order=idx) except: pass return metadata
def update(metadata, siteNum, movieGenres, movieActors): metadata_id = str(metadata.id).split('|') sceneURL = PAutils.Decode(metadata_id[0]) if not sceneURL.startswith('http'): sceneURL = PAsearchSites.getSearchSearchURL(siteNum) + sceneURL req = PAutils.HTTPRequest(sceneURL, headers={'Cookie': 'cLegalAge=true'}) detailsPageElements = HTML.ElementFromString(req.text) # Title Title = detailsPageElements.xpath( '//span[@class="contentFilmName"]')[0].text_content().strip().title() metadata.title = Title.title() # Studio metadata.studio = PAsearchSites.getSearchSiteName(siteNum) # Tagline and Collection(s) metadata.collections.clear() tagline = PAsearchSites.getSearchSiteName(siteNum) metadata.tagline = tagline metadata.collections.add(tagline) # Release Date date = detailsPageElements.xpath('//span[@class="contentFileDate"]')[ 0].text_content().strip().split(' • ')[0] date_object = datetime.strptime(date, '%Y %B %d') metadata.originally_available_at = date_object metadata.year = metadata.originally_available_at.year # Summary # Basic Summary try: description = detailsPageElements.xpath( '//div[@class="contentPreviewDescription"]')[0].text_content() metadata.summary = description.strip() except: pass # Genres movieGenres.clearGenres() # Default Genres genres = ['BDSM', 'S&M'] for genreName in genres: movieGenres.addGenre(genreName) # Dynamic Genres for genreLink in detailsPageElements.xpath( '//div[@class="contentPreviewTags"]/a'): genreName = genreLink.text_content().strip() movieGenres.addGenre(genreName) # Actors movieActors.clearActors() siteActors = [ 'abby', 'briana', 'david', 'diamond', 'greta', 'hellia', 'hilda', 'holly', 'jade', 'jeby', 'jessica', 'keya', 'lilith', 'luna', 'marc', 'micha', 'misty', 'nastee', 'nazryana', 'pearl', 'qs', 'queensnake', 'rachel', 'ruby', 'sharon', 'suzy', 'tanita', 'tracy', 'zara' ] for actorLink in detailsPageElements.xpath( '//div[@class="contentPreviewTags"]/a'): if actorLink.text_content().strip().lower() in siteActors: actorName = actorLink.text_content().strip() actorPhotoURL = '' movieActors.addActor(actorName, actorPhotoURL) # Posters art = [] for poster in detailsPageElements.xpath( '//div[@class="contentBlock"]//img[contains(@src, "preview")]/@src' ): posterUrl = PAsearchSites.getSearchBaseURL(siteNum) + poster.split( '?')[0] art.append(posterUrl) Log('Artwork found: %d' % len(art)) for idx, posterUrl in enumerate(art, 1): if not PAsearchSites.posterAlreadyExists(posterUrl, metadata): # Download image file for analysis try: image = PAutils.HTTPRequest(posterUrl) im = StringIO(image.content) resized_image = Image.open(im) width, height = resized_image.size # Add the image proxy items to the collection if width > 1: # Item is a poster metadata.posters[posterUrl] = Proxy.Media(image.content, sort_order=idx) if width > 100 and idx > 1: # Item is an art item metadata.art[posterUrl] = Proxy.Media(image.content, sort_order=idx) except: pass return metadata
def update(metadata, siteNum, movieGenres, movieActors): metadata_id = str(metadata.id).split('|') sceneURL = PAutils.Decode(metadata_id[0]) if not sceneURL.startswith('http'): sceneURL = PAsearchSites.getSearchBaseURL(siteNum) + sceneURL sceneDate = metadata_id[2] req = PAutils.HTTPRequest(sceneURL) detailsPageElements = HTML.ElementFromString(req.text) # Title metadata.title = detailsPageElements.xpath( '//div[@class="content-desc content-new-scene"]//h1')[0].text_content( ).replace('Video -', '').replace('Movie -', '').strip() # Studio metadata.studio = PAsearchSites.getSearchSiteName(siteNum) # Summary try: metadata.summary = detailsPageElements.xpath( '//div[@class="content-desc content-new-scene"]//p' )[0].text_content().strip() except: pass # Genres movieGenres.clearGenres() for genre in detailsPageElements.xpath( '//ul[contains(@class,"scene-tags")]/li'): genreName = genre.xpath('.//a')[0].text_content().lower() movieGenres.addGenre(genreName) # Release Date date = detailsPageElements.xpath('//meta[@itemprop="uploadDate"]')[0].get( 'content') if date: date_object = datetime.strptime(date, '%m/%d/%Y') metadata.originally_available_at = date_object metadata.year = metadata.originally_available_at.year elif sceneDate: date_object = parse(date) metadata.originally_available_at = date_object metadata.year = metadata.originally_available_at.year # Actors movieActors.clearActors() for actorPage in detailsPageElements.xpath( '//ul[@id="featured_pornstars"]//div[@class="model"]'): actorName = actorPage.xpath('.//h3')[0].text_content().strip() actorPhotoURL = actorPage.xpath('.//img/@src')[0] movieActors.addActor(actorName, actorPhotoURL) # Posters art = [ detailsPageElements.xpath( '//div[@id="trailer_player_finished"]//img/@src')[0] ] Log('Artwork found: %d' % len(art)) for idx, posterUrl in enumerate(art, 1): if not PAsearchSites.posterAlreadyExists(posterUrl, metadata): # Download image file for analysis try: image = PAutils.HTTPRequest(posterUrl) im = StringIO(image.content) resized_image = Image.open(im) width, height = resized_image.size # Add the image proxy items to the collection if width > 1: # Item is a poster metadata.posters[posterUrl] = Proxy.Media(image.content, sort_order=idx) if width > 100: # Item is an art item metadata.art[posterUrl] = Proxy.Media(image.content, sort_order=idx) except: pass return metadata
def search(results, lang, siteNum, search): req = PAutils.HTTPRequest( PAsearchSites.getSearchSearchURL(siteNum) + search['encoded']) searchResults = HTML.ElementFromString(req.text) for searchResult in searchResults.xpath('//div[@class="scene"]'): url = searchResult.xpath('.//a[@data-track="TITLE_LINK"]/@href')[0] if '/scenes/' in url: curID = PAutils.Encode(url) titleNoFormatting = searchResult.xpath( './/a[@data-track="TITLE_LINK"]')[0].text_content() releaseDate = parse( searchResult.xpath('./span[@class="scene-date"]') [0].text_content().strip()).strftime('%Y-%m-%d') if search['date']: score = 100 - Util.LevenshteinDistance(search['date'], releaseDate) else: score = 100 - Util.LevenshteinDistance( search['title'].lower(), titleNoFormatting.lower()) results.Append( MetadataSearchResult( id='%s|%d|%s' % (curID, siteNum, releaseDate), name='%s [%s] %s' % (titleNoFormatting, PAsearchSites.getSearchSiteName(siteNum), releaseDate), score=score, lang=lang)) # search for exact scene name urlTitle = search['encoded'].replace('%20', '-') urls = [ PAsearchSites.getSearchBaseURL(siteNum) + '/scenes/video---' + urlTitle + '_vids.html', PAsearchSites.getSearchBaseURL(siteNum) + '/scenes/movie---' + urlTitle + '_vids.html' ] for url in urls: try: sceneReq = PAutils.HTTPRequest(url) scenePage = HTML.ElementFromString(sceneReq.text) curID = PAutils.Encode(url) titleNoFormatting = scenePage.xpath( '//div[@class="content-desc content-new-scene"]//h1' )[0].text_content().strip() releaseDate = parse( scenePage.xpath('//meta[@itemprop="uploadDate"]')[0].get( 'content')) score = 100 results.Append( MetadataSearchResult( id='%s|%d|%s' % (curID, siteNum, releaseDate), name='%s [%s] %s' % (titleNoFormatting, PAsearchSites.getSearchSiteName(siteNum), releaseDate), score=score, lang=lang)) except: pass return results