def update(metadata,siteID,movieGenres,movieActors): temp = str(metadata.id).split("|")[0] Log('temp: ' + temp) url = PAsearchSites.getSearchSearchURL(siteID) + temp Log('url:' + url) detailsPageElements = HTML.ElementFromURL(url) # Studio metadata.studio = PAsearchSites.getSearchSiteName(siteID) Log('Studio: ' + metadata.studio) # Summary metadata.summary = detailsPageElements.xpath('//p[@class="video-description"]')[0].text_content().strip() # Tagline and Collection tagline = metadata.studio metadata.collections.clear() metadata.tagline = tagline metadata.collections.add(tagline) # Genres movieGenres.clearGenres() genres = detailsPageElements.xpath('//a[@class="video-tag"]') if len(genres) > 0: for genre in genres: movieGenres.addGenre(genre.text_content()) # Actors movieActors.clearActors() actors = detailsPageElements.xpath('//a[contains(@class,"video-actor-link")]') if len(actors) > 0: for actorLink in actors: actorName = actorLink.text_content() actorPageURL = PAsearchSites.getSearchBaseURL(siteID) + actorLink.get("href") actorPage = HTML.ElementFromURL(actorPageURL) actorPhotoURL = actorPage.xpath('//img[@class="girl-details-photo"]')[0].get("src").split('?') movieActors.addActor(actorName,actorPhotoURL[0]) # Posters/Background valid_names = list() metadata.posters.validate_keys(valid_names) metadata.art.validate_keys(valid_names) posters = detailsPageElements.xpath('//div[contains(@class,"gallery-item")]') posterNum = 1 for posterCur in posters: posterURL = posterCur.get("data-big-image") metadata.posters[posterURL] = Proxy.Preview(HTTP.Request(posterURL, headers={'Referer': 'http://www.google.com'}).content, sort_order = posterNum) posterNum = posterNum + 1 backgroundURL = detailsPageElements.xpath('//img[@class="video-image"]')[0].get("src").split('?') metadata.art[backgroundURL[0]] = Proxy.Preview(HTTP.Request(backgroundURL[0], headers={'Referer': 'http://www.google.com'}).content, sort_order = 1) # Date date = detailsPageElements.xpath('.//div[@class="video-details"]//p[@class="video-upload-date"]')[0].text_content().split(":") dateFixed = date[1].strip() Log('DateFixed: ' + dateFixed) date_object = datetime.strptime(dateFixed, '%B %d, %Y') metadata.originally_available_at = date_object metadata.year = metadata.originally_available_at.year # Title titleOfficial = detailsPageElements.xpath('//div[@class="video-rating-and-details"]//h1[@class="heading heading--2 video-title"]')[0].text_content() metadata.title = metadata.studio + " - " + titleOfficial Log('Title: ' + metadata.title) return metadata
def search(results, lang, siteNum, searchData): searchResults = [] url = PAsearchSites.getSearchSearchURL( siteNum) + searchData.encoded + '&sid=587' req = PAutils.HTTPRequest(url) siteSearchResults = HTML.ElementFromString(req.text) for searchResult in siteSearchResults.xpath('//div[@class="itemm"]'): sceneURL = PAsearchSites.getSearchBaseURL( siteNum) + '/tour/%s' % searchResult.xpath('.//@href')[0] searchResults.append(sceneURL) googleResults = PAutils.getFromGoogleSearch(searchData.title, siteNum) for result in googleResults: pattern = re.search(r'(?<=\dpp\/).*(?=\/)', result) if pattern: sceneID = pattern.group(0) sceneURL = PAsearchSites.getSearchBaseURL( siteNum) + '/t1/refstat.php?lid=%s&sid=584' % sceneID if ('content' in result) and sceneURL not in searchResults: searchResults.append(sceneURL) for sceneURL in searchResults: req = PAutils.HTTPRequest(sceneURL) detailsPageElements = HTML.ElementFromString(req.text) if ('content' in req.url): titleNoFormatting = detailsPageElements.xpath( '//h2[@class="vidtitle"]')[0].text_content().strip().replace( '\"', '') curID = PAutils.Encode(sceneURL) date = detailsPageElements.xpath( '//h3[@class="releases"]//preceding-sibling::text()')[0].strip( ) if date: releaseDate = parse(date).strftime('%Y-%m-%d') else: releaseDate = searchData.dateFormat( ) if searchData.date else '' displayDate = releaseDate if date else '' if searchData.date and displayDate: score = 100 - Util.LevenshteinDistance(searchData.date, releaseDate) else: score = 100 - Util.LevenshteinDistance( searchData.title.lower(), titleNoFormatting.lower()) results.Append( MetadataSearchResult( id='%s|%d|%s' % (curID, siteNum, releaseDate), name='%s [%s] %s' % (titleNoFormatting, PAsearchSites.getSearchSiteName(siteNum), displayDate), score=score, lang=lang)) return results
def search(results, lang, siteNum, searchData): # Scenes by name req = PAutils.HTTPRequest( PAsearchSites.getSearchSearchURL(siteNum) + searchData.encoded) searchResults = HTML.ElementFromString(req.text) for searchResult in searchResults.xpath( '//div[@class="scenes list"]/div[@class="items"]/div[@class="scene thumbnail "]' ): titleNoFormatting = searchResult.xpath( './/div[@class="textual"]/a')[0].text_content().strip() curID = PAutils.Encode( searchResult.xpath('.//a[@class="title"]/@href')[0]) score = 100 - Util.LevenshteinDistance(searchData.title.lower(), titleNoFormatting.lower()) results.Append( MetadataSearchResult( id='%s|%d' % (curID, siteNum), name='%s [%s]' % (titleNoFormatting, PAsearchSites.getSearchSiteName(siteNum)), score=score, lang=lang)) # Movies by name for searchResult in searchResults.xpath( '//div[@class="movies list"]/div[@class="items"]/a[@class="movie thumbnail"]' ): titleNoFormatting = searchResult.xpath( './h2')[0].text_content().strip() movieLink = searchResult.xpath('./@href')[0] curID = PAutils.Encode(movieLink) score = 100 - Util.LevenshteinDistance(searchData.title.lower(), titleNoFormatting.lower()) results.Append( MetadataSearchResult( id='%s|%d' % (curID, siteNum), name='%s - Full Movie [%s]' % (titleNoFormatting, PAsearchSites.getSearchSiteName(siteNum)), score=score, lang=lang)) # Also append all the scenes from matching movies req = PAutils.HTTPRequest( PAsearchSites.getSearchBaseURL(siteNum) + movieLink) moviePageElements = HTML.ElementFromString(req.text) for movieScene in moviePageElements.xpath( '//div[@class="scenes"]/div[@class="list"]/div[@class="scene thumbnail "]' ): titleNoFormatting = movieScene.xpath( './/div[@class="textual"]/a')[0].text_content().strip() curID = curID = PAutils.Encode( movieScene.xpath('.//a[@class="title"]/@href')[0]) score = 100 - Util.LevenshteinDistance(searchData.title.lower(), titleNoFormatting.lower()) results.Append( MetadataSearchResult( id='%s|%d' % (curID, siteNum), name='%s [%s]' % (titleNoFormatting, PAsearchSites.getSearchSiteName(siteNum)), score=score, lang=lang)) return results
def search(results, lang, siteNum, searchData): sceneID = searchData.title.split(' ', 1)[0] if unicode(sceneID, 'UTF-8').isdigit(): searchData.title = searchData.title.replace(sceneID, '', 1).strip() else: sceneID = None if sceneID: sceneURL = PAsearchSites.getSearchBaseURL( siteNum) + '/post/details/' + sceneID req = PAutils.HTTPRequest(sceneURL) detailsPageElements = HTML.ElementFromString(req.text) curID = PAutils.Encode(sceneURL) titleNoFormatting = detailsPageElements.xpath( '//div[contains(@class, "mediaHeader")]//span[contains(@class, "title")]' )[0].text_content().strip() studio = detailsPageElements.xpath('//span[contains(@class, "type")]')[ 0].text_content().split('|')[0].strip() score = 100 results.Append( MetadataSearchResult(id='%s|%d' % (curID, siteNum), name='%s [%s]' % (titleNoFormatting, studio), score=score, lang=lang)) else: searchData.encoded = searchData.title.replace(' ', '+') req = PAutils.HTTPRequest( PAsearchSites.getSearchSearchURL(siteNum) + searchData.encoded) searchResults = HTML.ElementFromString(req.text) for searchResult in searchResults.xpath( '//div[contains(@class, "post")]'): titleNoFormatting = searchResult.xpath( './/span[contains(@class, "title")]')[0].text_content().strip( ) sceneURL = searchResult.xpath( './/a[contains(@class, "media")]/@href')[0] studio = searchResult.xpath('.//span[contains(@class, "source")]' )[0].text_content().strip() sceneCover = PAutils.Encode( searchResult.xpath( './/a[contains(@class, "media")]//img[contains(@class, "image")]/@src' )[0]) releaseDate = searchData.dateFormat() if searchData.date else '' curID = PAutils.Encode(sceneURL) score = 90 - Util.LevenshteinDistance(searchData.title.lower(), titleNoFormatting.lower()) if PAsearchSites.getSearchSiteName( siteNum).lower() == studio.lower(): score += 10 results.Append( MetadataSearchResult(id='%s|%d|%s|%s' % (curID, siteNum, releaseDate, sceneCover), name='%s [%s]' % (titleNoFormatting, studio), score=score, lang=lang)) return results
def search(results, encodedTitle, searchTitle, siteNum, lang, searchDate): sceneID = searchTitle.split(' ', 1)[0] if unicode(sceneID, 'UTF-8').isdigit(): searchTitle = searchTitle.replace(sceneID, '', 1).strip() else: sceneID = None if sceneID and not searchTitle: req = PAutils.HTTPRequest(PAsearchSites.getSearchBaseURL(siteNum) + '/' + sceneID, cookies={'sst': 'ulang-en'}) if req.ok: detailsPageElements = HTML.ElementFromString(req.text) titleNoFormatting = detailsPageElements.xpath( '//h1[@class="detail__title"]')[0].text_content() curID = PAutils.Encode( PAsearchSites.getSearchBaseURL(siteNum) + '/' + sceneID) releaseDate = '' date = detailsPageElements.xpath( '//span[@class="detail__date"]')[0].text_content().strip() if date: releaseDate = parse(date).strftime('%Y-%m-%d') score = 100 results.Append( MetadataSearchResult(id='%s|%d' % (curID, siteNum), name='[%s] %s %s' % (PAsearchSites.getSearchSiteName(siteNum), titleNoFormatting, releaseDate), score=score, lang=lang)) else: encodedTitle = searchTitle.replace(' ', '+') req = PAutils.HTTPRequest(PAsearchSites.getSearchSearchURL(siteNum) + encodedTitle, cookies={'sst': 'ulang-en'}) searchResults = HTML.ElementFromString(req.text) for searchResult in searchResults.xpath( '//ul[@class="cards-list"]//li'): titleNoFormatting = searchResult.xpath( './/div[@class="card__footer"]//div[@class="card__h"]/text()' )[0] curID = PAutils.Encode(searchResult.xpath('.//a/@href')[0]) releaseDate = parse( searchResult.xpath('.//div[@class="card__date"]') [0].text_content().strip()).strftime('%Y-%m-%d') if searchDate: score = 100 - Util.LevenshteinDistance(searchDate, releaseDate) else: score = 100 - Util.LevenshteinDistance( searchTitle.lower(), titleNoFormatting.lower()) results.Append( MetadataSearchResult( id='%s|%d' % (curID, siteNum), name='%s [%s] %s' % (titleNoFormatting, PAsearchSites.getSearchSiteName(siteNum), releaseDate), score=score, lang=lang)) return results
def update(metadata, siteNum, movieGenres, movieActors): metadata_id = str(metadata.id).split('|') sceneID = metadata_id[0] url = PAsearchSites.getSearchSearchURL( siteNum ) + '?x-algolia-application-id=I6P9Q9R18E&x-algolia-api-key=08396b1791d619478a55687b4deb48b4' detailsPageElements = getAlgolia(url, 'nacms_scenes_production', 'filters=id=' + sceneID)[0] # Title metadata.title = detailsPageElements['title'] # Summary metadata.summary = detailsPageElements['synopsis'] # Studio metadata.studio = 'Naughty America' # Tagline and Collection(s) metadata.collections.clear() metadata.collections.add(metadata.studio) metadata.collections.add(detailsPageElements['site']) # Release Date date_object = datetime.fromtimestamp(detailsPageElements['published_at']) metadata.originally_available_at = date_object metadata.year = metadata.originally_available_at.year # Genres movieGenres.clearGenres() for genreLink in detailsPageElements['fantasies']: genreName = genreLink movieGenres.addGenre(genreName) # Actors movieActors.clearActors() for actorLink in detailsPageElements['performers']: actorName = actorLink actorPhotoURL = '' actorsPageURL = 'https://www.naughtyamerica.com/pornstar/' + actorName.lower( ).replace(' ', '-').replace("'", '') req = PAutils.HTTPRequest(actorsPageURL) actorsPageElements = HTML.ElementFromString(req.text) img = actorsPageElements.xpath('//img[@class="performer-pic"]/@src') if img: actorPhotoURL = 'https:' + img[0] movieActors.addActor(actorName, actorPhotoURL) # Posters art = [] req = PAutils.HTTPRequest('https://www.naughtyamerica.com/scene/0' + sceneID) scenePageElements = HTML.ElementFromString(req.text) for photo in scenePageElements.xpath( '//div[contains(@class, "contain-scene-images") and contains(@class, "desktop-only")]/a/@href' ): img = 'https:' + re.sub( r'images\d+', 'images1', photo, 1, flags=re.IGNORECASE) art.append(img) Log('Artwork found: %d' % len(art)) for idx, posterUrl in enumerate(art, 1): if not PAsearchSites.posterAlreadyExists(posterUrl, metadata): # Download image file for analysis try: image = PAutils.HTTPRequest(posterUrl) im = StringIO(image.content) resized_image = Image.open(im) width, height = resized_image.size # Add the image proxy items to the collection if width > 1: # Item is a poster metadata.posters[posterUrl] = Proxy.Media(image.content, sort_order=idx) if width > 100 and width > height: # Item is an art item metadata.art[posterUrl] = Proxy.Media(image.content, sort_order=idx) except: pass return metadata
def search(results, lang, siteNum, searchData): directURL = searchData.title.replace(' ', '-').lower() if '/' not in directURL: directURL = directURL.replace('-', '/', 1) shootID = directURL.split('/', 2)[0] if not unicode(shootID, 'UTF-8').isdigit(): shootID = None directURL = directURL.replace('/', '-', 1) else: directURL = directURL.split('/')[1] directURL = PAsearchSites.getSearchSearchURL(siteNum) + directURL searchResultsURLs = [directURL] googleResults = PAutils.getFromGoogleSearch(searchData.title, siteNum) for sceneURL in googleResults: sceneURL = sceneURL.rsplit('?', 1)[0] if sceneURL not in searchResultsURLs: if ('/movies/' in sceneURL): searchResultsURLs.append(sceneURL) for sceneURL in searchResultsURLs: detailsPageElements = getJSONfromPage(sceneURL) if detailsPageElements: contentName = None for name in ['moviesContent', 'videosContent']: if name in detailsPageElements and detailsPageElements[name]: contentName = name break if contentName: detailsPageElements = detailsPageElements[contentName] curID = detailsPageElements.keys()[0] detailsPageElements = detailsPageElements[curID] titleNoFormatting = detailsPageElements['title'] if 'site' in detailsPageElements: subSite = detailsPageElements['site']['name'] else: subSite = PAsearchSites.getSearchSiteName(siteNum) if 'publishedDate' in detailsPageElements: releaseDate = parse( detailsPageElements['publishedDate']).strftime( '%Y-%m-%d') else: releaseDate = searchData.dateFormat( ) if searchData.date else '' displayDate = releaseDate if 'publishedDate' in detailsPageElements else '' if searchData.date and displayDate: score = 100 - Util.LevenshteinDistance( searchData.date, releaseDate) else: score = 100 - Util.LevenshteinDistance( searchData.title.lower(), titleNoFormatting.lower()) results.Append( MetadataSearchResult( id='%s|%d|%s|%s' % (curID, siteNum, releaseDate, contentName), name='%s [Mylf/%s] %s' % (titleNoFormatting, subSite, displayDate), score=score, lang=lang)) return results
def update(metadata, siteID, movieGenres, movieActors): Log('******UPDATE CALLED*******') url = str(metadata.id).split("|")[0].replace('+', '/').replace('?', '!') if "dvds" in url: sceneType = "DVD" Log("Is DVD") else: sceneType = "Scene" Log("Is Scene") detailsPageElements = HTML.ElementFromURL(url) art = [] metadata.collections.clear() movieGenres.clearGenres() movieActors.clearActors() # Studio metadata.studio = 'New Sensations' if sceneType == "Scene": Log("SceneUpdate") # Title metadata.title = detailsPageElements.xpath( '//div[@class="trailerVideos clear"]/div[1]')[0].text_content( ).strip() # Summary metadata.summary = detailsPageElements.xpath( '//div[@class="trailerInfo"]/p')[0].text_content().strip() # Tagline and Collection(s) # DVD Name dvdName = detailsPageElements.xpath( '//div[@class="trailerInfo"]/ul/li[4]')[0].text_content().strip() metadata.tagline = dvdName metadata.collections.add(dvdName) #Site Name siteName = PAsearchSites.getSearchSiteName(siteID).strip() metadata.collections.add(siteName) # Genres genres = detailsPageElements.xpath( '//div[@class="trailerInfo"]/ul/li[3]/a') if len(genres) > 0: for genreLink in genres: genreName = genreLink.text_content().strip().lower() movieGenres.addGenre(genreName) # Release Date try: date = str(metadata.id).split("|")[2] if len(date) > 0: date_object = parse(date) metadata.originally_available_at = date_object metadata.year = metadata.originally_available_at.year Log("Date from file") except: pass # Actors actors = detailsPageElements.xpath( '//div[@class="trailerInfo"]/ul/li[1]/span/a') if len(actors) > 0: if len(actors) == 3: movieGenres.addGenre("Threesome") if len(actors) == 4: movieGenres.addGenre("Foursome") if len(actors) > 4: movieGenres.addGenre("Orgy") for actorLink in actors: actorName = str(actorLink.text_content().strip()) try: actorPageURL = actorLink.get("href") actorPage = HTML.ElementFromURL(actorPageURL) actorPhotoURL = actorPage.xpath( '//div[@class="modelPicture"]/div/img')[0].get( "src0_3x") except: actorPhotoURL = '' movieActors.addActor(actorName, actorPhotoURL) ### Posters and artwork ### # Video trailer background image j = 1 try: twitterBG = detailsPageElements.xpath( '//div[@class="trailerArea"]/a[1]')[0].get('href') twitterBG = PAsearchSites.getSearchSearchURL(siteID) + twitterBG metadata.art[twitterBG] = Proxy.Preview(HTTP.Request( twitterBG, headers={ 'Referer': 'http://www.google.com' }).content, sort_order=j) j += 1 except: pass # DVD Page posterNum = 1 try: dvdPageLink = detailsPageElements.xpath( '//div[@class="trailerInfo"]/ul/li[4]/a')[0].get('href') dvdPageElements = HTML.ElementFromURL(dvdPageLink) dvdPosterURL = dvdPageElements.xpath( '//div[@class="dvdcover"]//img')[0].get("src") if dvdPosterURL == None: dvdPosterURL = dvdPageElements.xpath( '//div[@class="dvdcover"]//img')[0].get("data-src") metadata.posters[dvdPosterURL] = Proxy.Preview( HTTP.Request(dvdPosterURL, headers={ 'Referer': 'http://www.google.com' }).content, sort_order=posterNum) posterNum += 1 except: Log("DVD Cover not found") pass else: Log("DVDUpdate") # Title title = detailsPageElements.xpath( '//div[@class="dvdSections clear"]/div[1]')[0].text_content( ).replace("DVDS /", "").strip() metadata.title = title # Summary metadata.summary = detailsPageElements.xpath( '//div[@class="dDetails"]/p')[0].text_content().strip() # Tagline and Collection(s) # DVD Name dvdName = title metadata.tagline = dvdName metadata.collections.add(dvdName) # Site Name siteName = PAsearchSites.getSearchSiteName(siteID).strip() metadata.collections.add(siteName) # Genres genres = detailsPageElements.xpath( '//div[@class="dvdDetails clear"]/ul/li[2]/a') if len(genres) > 0: for genreLink in genres: genreName = genreLink.text_content().strip().lower() movieGenres.addGenre(genreName) # Release Date date = detailsPageElements.xpath( '//div[@class="dvdDetails clear"]/ul/li[1]')[0].text_content( ).replace('Released:', '').strip() if len(date) > 0: try: date_object = datetime.strptime(date, '%Y-%m-%d') except: date_object = datetime.strptime(date, '%m/%d/%y') metadata.originally_available_at = date_object metadata.year = metadata.originally_available_at.year # Actors try: actors = detailsPageElements.xpath( '//span[@class="tour_update_models"]/a') if len(actors) > 0: for actorLink in actors: actorName = str(actorLink.text_content().strip()) try: actorPageURL = actorLink.get("href") actorPage = HTML.ElementFromURL(actorPageURL) actorPhotoURL = actorPage.xpath( '//div[@class="modelPicture"]/div/img')[0].get( "src0_3x") except: actorPhotoURL = '' movieActors.addActor(actorName, actorPhotoURL) except: actorsList = detailsPageElements.xpath( '//div[@class="dvdDetails clear"]/div[2]/p')[0].text_content( ).split('Featuring:')[1] actors = actorsList.split(",") if len(actors) > 0: for actorLink in actors: actorName = str(actorLink.strip()) actorPhotoURL = '' movieActors.addActor(actorName, actorPhotoURL) ### Posters and artwork ### # DVD Cover posterNum = 1 try: dvdPosterURL = detailsPageElements.xpath( '//div[@class="dvdcover"]//img')[0].get("src") if dvdPosterURL == None: dvdPosterURL = detailsPageElements.xpath( '//div[@class="dvdcover"]//img')[0].get("data-src") metadata.posters[dvdPosterURL] = Proxy.Preview( HTTP.Request(dvdPosterURL, headers={ 'Referer': 'http://www.google.com' }).content, sort_order=posterNum) posterNum += 1 except: Log("DVD Cover not found") pass return metadata
def search(results, encodedTitle, title, searchTitle, siteNum, lang, searchByDateActor, searchDate, searchSiteID): if searchSiteID != 9999: siteNum = searchSiteID cookies = get_Cookies(PAsearchSites.getSearchBaseURL(siteNum)) headers = { 'Instance': cookies['instance_token'], } sceneID = None splited = searchTitle.split(' ') if unicode(splited[0], 'utf8').isdigit(): sceneID = splited[0] searchTitle = searchTitle.replace(sceneID, '', 1).strip() for sceneType in ['scene', 'movie', 'serie', 'trailer']: if sceneID and not searchTitle: url = PAsearchSites.getSearchSearchURL( siteNum) + '/v2/releases?type=%s&id=%s' % (sceneType, sceneID) else: url = PAsearchSites.getSearchSearchURL( siteNum) + '/v2/releases?type=%s&search=%s' % (sceneType, encodedTitle) data = None req = urllib.Request(url, headers=headers) try: data = urllib.urlopen(req).read() except Exception as e: Log(e) pass if data: searchResults = json.loads(data) for searchResult in searchResults['result']: titleNoFormatting = searchResult['title'] releaseDate = parse( searchResult['dateReleased']).strftime('%Y-%m-%d') curID = searchResult['id'] siteName = searchResult['brand'].title() subSite = '' if 'collections' in searchResult and searchResult[ 'collections']: subSite = searchResult['collections'][0]['name'] siteDisplay = '%s/%s' % (siteName, subSite) if subSite else siteName if sceneID: score = 100 - Util.LevenshteinDistance(sceneID, curID) elif searchDate: score = 100 - Util.LevenshteinDistance( searchDate, releaseDate) else: score = 100 - Util.LevenshteinDistance( searchTitle.lower(), titleNoFormatting.lower()) if sceneType == 'trailer': titleNoFormatting = '[%s] %s' % (sceneType.capitalize(), titleNoFormatting) score = score - 10 results.Append( MetadataSearchResult( id='%s|%d|%s' % (curID, siteNum, sceneType), name='%s [%s] %s' % (titleNoFormatting, siteDisplay, releaseDate), score=score, lang=lang)) return results
def search(results, encodedTitle, title, searchTitle, siteNum, lang, searchByDateActor, searchDate, searchSiteID): if searchSiteID != 9999: siteNum = searchSiteID urlWowXXX = PAsearchSites.getSearchSearchURL( siteNum) + '%22' + encodedTitle + '%22' urlWowTV = 'https://www.wowgirls.tv/?s=%22' + encodedTitle + '%22' headers = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36' } try: searchResultsWowXXX = HTML.ElementFromURL(urlWowXXX) except: request = urllib.Request(urlWowXXX, headers=headers) response = urllib.urlopen(request, context=ssl.SSLContext(ssl.PROTOCOL_TLSv1)) htmlstring = response.read() searchResultsWowXXX = fromstring(htmlstring) Log('Search-Video-Number: ' + searchResultsWowXXX.xpath('//span[@class="search-video-number"]') [0].text_content().split(' ', 1)[0]) if int( searchResultsWowXXX.xpath('//span[@class="search-video-number"]') [0].text_content().split(' ', 1)[0]) > 0: Log('Title Found on wowgirls.xxx') for searchResult in searchResultsWowXXX.xpath( '//div[@class="videos-list"]/article'): titleNoFormatting = searchResult.xpath('.//a')[0].get( 'title').strip() curID = searchResult.xpath('.//a')[0].get('href').replace( '/', '_').replace('?', '!') score = 100 - Util.LevenshteinDistance(searchTitle.lower(), titleNoFormatting.lower()) if searchDate: releaseDate = parse(searchDate).strftime('%Y-%m-%d') else: releaseDate = '' results.Append( MetadataSearchResult( id=curID + "|" + str(siteNum) + "|" + releaseDate, name=titleNoFormatting + " [WowGirls.xxx] ", score=score, lang=lang)) else: Log('Title not found on wowgirls.xxx, trying wowgirls.tv') try: searchResultsWowTV = HTML.ElementFromURL(urlWowTV) except: request = urllib.Request(urlWowTV, headers=headers) response = urllib.urlopen(request, context=ssl.SSLContext( ssl.PROTOCOL_TLSv1)) htmlstring = response.read() searchResultsWowTV = fromstring(htmlstring) if len(searchResultsWowTV.xpath('//h1')) == 0: Log('Title found on wowgirls.tv') for searchResult in searchResultsWowTV.xpath( '//div[@class="entry clearfix latest"]'): titleNoFormatting = searchResult.xpath( './/h3/a')[0].text_content().strip() curID = searchResult.xpath('.//h3/a')[0].get('href').replace( '/', '_').replace('?', '!') score = 100 - Util.LevenshteinDistance( searchTitle.lower(), titleNoFormatting.lower()) if searchDate: releaseDate = parse(searchDate).strftime('%Y-%m-%d') else: releaseDate = '' results.Append( MetadataSearchResult( id=curID + "|" + str(siteNum) + "|" + releaseDate, name=titleNoFormatting + " [WowGirls.tv] ", score=score, lang=lang)) else: Log('No exact Title found, trying normal Search on wowgirls.xxx and .tv' ) urlWowXXX = PAsearchSites.getSearchSearchURL( siteNum) + encodedTitle urlWowTV = 'https://www.wowgirls.tv/?s=' + encodedTitle try: searchResultsWowXXX = HTML.ElementFromURL(urlWowXXX) except: request = urllib.Request(urlWowXXX, headers=headers) response = urllib.urlopen(request, context=ssl.SSLContext( ssl.PROTOCOL_TLSv1)) htmlstring = response.read() searchResultsWowXXX = fromstring(htmlstring) for searchResult in searchResultsWowXXX.xpath( '//div[@class="videos-list"]/article'): titleNoFormatting = searchResult.xpath('.//a')[0].get( 'title').strip() curID = searchResult.xpath('.//a')[0].get('href').replace( '/', '_').replace('?', '!') score = 100 - Util.LevenshteinDistance( searchTitle.lower(), titleNoFormatting.lower()) if searchDate: releaseDate = parse(searchDate).strftime('%Y-%m-%d') else: releaseDate = '' results.Append( MetadataSearchResult( id=curID + "|" + str(siteNum) + "|" + releaseDate, name=titleNoFormatting + " [WowGirls.xxx] ", score=score, lang=lang)) try: searchResultsWowTV = HTML.ElementFromURL(urlWowTV) except: request = urllib.Request(urlWowTV, headers=headers) response = urllib.urlopen(request, context=ssl.SSLContext( ssl.PROTOCOL_TLSv1)) htmlstring = response.read() searchResultsWowTV = fromstring(htmlstring) for searchResult in searchResultsWowTV.xpath( '//div[@class="entry clearfix latest"]'): titleNoFormatting = searchResult.xpath( './/h3/a')[0].text_content().strip() curID = searchResult.xpath('.//h3/a')[0].get('href').replace( '/', '_').replace('?', '!') score = 100 - Util.LevenshteinDistance( searchTitle.lower(), titleNoFormatting.lower()) if searchDate: releaseDate = parse(searchDate).strftime('%Y-%m-%d') else: releaseDate = '' results.Append( MetadataSearchResult( id=curID + "|" + str(siteNum) + "|" + releaseDate, name=titleNoFormatting + " [WowGirls.tv] ", score=score, lang=lang)) return results
def update(metadata, siteID, movieGenres, movieActors): Log('******UPDATE CALLED*******') metadata_id = str(metadata.id).split('|') sceneID = int(metadata_id[0]) sceneType = metadata_id[2] sceneIDName = 'clip_id' if sceneType == 'scenes' else 'movie_id' sceneDate = metadata_id[3] apiKEY = getAPIKey(PAsearchSites.getSearchBaseURL(siteID)) url = PAsearchSites.getSearchSearchURL( siteID ) + '?x-algolia-application-id=TSMKFA364Q&x-algolia-api-key=' + apiKEY data = getAlgolia(url, 'all_' + sceneType, 'filters=%s=%d' % (sceneIDName, sceneID), PAsearchSites.getSearchBaseURL(siteID)) detailsPageElements = data['results'][0]['hits'][0] # Studio metadata.studio = detailsPageElements['studio_name'] # Title metadata.title = detailsPageElements['title'] # Summary metadata.summary = detailsPageElements['description'].replace( '</br>', '\n') # Release Date date_object = parse(sceneDate) metadata.originally_available_at = date_object metadata.year = metadata.originally_available_at.year # Tagline and Collection(s) metadata.collections.clear() for collectionName in ['studio_name', 'serie_name']: if collectionName in detailsPageElements: metadata.collections.add(detailsPageElements[collectionName]) if ':' in detailsPageElements['title'] or '#' in detailsPageElements[ 'title']: if 'movie_title' in detailsPageElements: metadata.collections.add(detailsPageElements['movie_title']) # Genres movieGenres.clearGenres() for genreLink in detailsPageElements['categories']: genreName = genreLink['name'] if genreName: movieGenres.addGenre(genreName) # Actors movieActors.clearActors() for actorLink in detailsPageElements['actors']: actorName = actorLink['name'] data = getAlgolia(url, 'all_actors', 'filters=actor_id=' + actorLink['actor_id'], PAsearchSites.getSearchBaseURL(siteID)) actorData = data['results'][0]['hits'][0] if actorData['pictures']: max_quality = sorted(actorData['pictures'].keys())[-1] actorPhotoURL = 'https://images-fame.gammacdn.com/actors' + actorData[ 'pictures'][max_quality] else: actorPhotoURL = '' movieActors.addActor(actorName, actorPhotoURL) # Posters art = [] if not PAsearchSites.getSearchBaseURL(siteID).endswith( ('girlsway.com', 'puretaboo.com')): art.append( 'https://images-fame.gammacdn.com/movies/{0}/{0}_{1}_front_400x625.jpg' .format(detailsPageElements['movie_id'], detailsPageElements['url_title'].lower().replace('-', '_'))) if 'pictures' in detailsPageElements: keys = [ key for key in detailsPageElements['pictures'].keys() if key[0].isdigit() ] max_quality = sorted(keys)[-1] art.append('https://images-fame.gammacdn.com/movies/' + detailsPageElements['pictures'][max_quality]) Log('Artwork found: %d' % len(art)) for idx, posterUrl in enumerate(art, 1): if not PAsearchSites.posterAlreadyExists(posterUrl, metadata): # Download image file for analysis try: img_file = urllib.urlopen(posterUrl) im = StringIO(img_file.read()) resized_image = Image.open(im) width, height = resized_image.size # Add the image proxy items to the collection if width > 1: # Item is a poster metadata.posters[posterUrl] = Proxy.Media(HTTP.Request( posterUrl, headers={ 'Referer': 'http://www.google.com' }).content, sort_order=idx) if width > 100 and width > height: # Item is an art item metadata.art[posterUrl] = Proxy.Media(HTTP.Request( posterUrl, headers={ 'Referer': 'http://www.google.com' }).content, sort_order=idx) except: pass return metadata
def update(metadata, lang, siteNum, movieGenres, movieActors): metadata_id = str(metadata.id).split('|') sceneID = metadata_id[0] url = PAsearchSites.getSearchSearchURL(siteNum) + '/scenes/' + sceneID req = getDataFromAPI(url) detailsPageElements = req['data'] # Title metadata.title = detailsPageElements['title'] # Summary metadata.summary = detailsPageElements['description'] # Studio, Tagline and Collection(s) metadata.collections.clear() if 'site' in detailsPageElements and detailsPageElements['site']: studio_name = detailsPageElements['site']['name'] collections = [studio_name] site_id = detailsPageElements['site']['id'] network_id = detailsPageElements['site']['network_id'] if network_id and site_id != network_id: url = PAsearchSites.getSearchSearchURL( siteNum) + '/sites/%d' % network_id req = getDataFromAPI(url) if req and 'data' in req and req['data']: studio_name = req['data']['name'] collections.append(studio_name) metadata.tagline = studio_name metadata.studio = studio_name for collection in collections: metadata.collections.add(collection) # Release Date date = detailsPageElements['date'] if date: date_object = parse(date) metadata.originally_available_at = date_object metadata.year = metadata.originally_available_at.year # Genres movieGenres.clearGenres() if 'tags' in detailsPageElements: for genreLink in detailsPageElements['tags']: genreName = genreLink['name'] movieGenres.addGenre(genreName) # Actors movieActors.clearActors() for actorLink in detailsPageElements['performers']: actorName = actorLink['name'] actorPhotoURL = actorLink['image'] if 'parent' in actorLink and actorLink[ 'parent'] and 'name' in actorLink['parent']: actorName = actorLink['parent']['name'] movieActors.addActor(actorName, actorPhotoURL) # Posters art = [ detailsPageElements['posters']['large'], detailsPageElements['background']['large'], ] Log('Artwork found: %d' % len(art)) for idx, posterUrl in enumerate(art, 1): if not PAsearchSites.posterAlreadyExists(posterUrl, metadata): # Download image file for analysis try: image = PAutils.HTTPRequest(posterUrl) im = StringIO(image.content) resized_image = Image.open(im) width, height = resized_image.size # Add the image proxy items to the collection if width > 1: # Item is a poster metadata.posters[posterUrl] = Proxy.Media(image.content, sort_order=idx) if width > 100 and width > height: # Item is an art item metadata.art[posterUrl] = Proxy.Media(image.content, sort_order=idx) except: pass return metadata
def search(results, encodedTitle, title, searchTitle, siteNum, lang, searchByDateActor, searchDate, searchSiteID): networkscene = True networkscenepages = True networkdvd = True directmatch = False network_sep_scene_prev = "" network_sep_scene = "" network_sep_scene_pages_prev = "" network_sep_scene_pages = "/" network_sep_scene_pages_next = "" network_sep_dvd_prev = "" network_sep_dvd = "/1/dvd" if searchSiteID != 9999: siteNum = searchSiteID if siteNum == 278 or (siteNum >= 285 and siteNum <= 287): network = 'XEmpire' network_sep_scene_prev = "scene/" network_sep_scene_pages_prev = "scene/" network_sep_dvd_prev = "dvd/" network_sep_dvd = "/1" elif siteNum == 329 or (siteNum >= 351 and siteNum <= 354): network = 'Blowpass' networkdvd = False elif siteNum == 331 or (siteNum >= 355 and siteNum <= 360) or siteNum == 750: network = 'Fantasy Massage' networkdvd = False network_sep_scene = "/scene" network_sep_scene_pages = "/scene/" elif (siteNum >= 365 and siteNum <= 372) or siteNum == 466 or siteNum == 692: network = '21Sextury' networkdvd = False elif siteNum == 183 or (siteNum >= 373 and siteNum <= 374): network = '21Naturals' networkdvd = False network_sep_scene = "/scene" network_sep_scene_pages = "/scene/" elif siteNum == 53 or (siteNum >= 375 and siteNum <= 379): network = 'Girlsway' networkdvd = False elif siteNum >= 383 and siteNum <= 386: network = 'Fame Digital' if siteNum == 383: networkdvd = False network_sep_scene = "/scene" network_sep_scene_pages = "/scene/" network_sep_dvd = "/dvd" if siteNum == 386: networkscene = False networkscenepages = False networkdvd = False elif siteNum >= 387 and siteNum <= 392: network = 'Open Life Network' networkdvd = False elif siteNum == 281: network = 'Pure Taboo' networkdvd = False network_sep_scene = "/scene" network_sep_scene_pages = "/scene/" elif siteNum == 380: network = 'Girlfriends Films' network_sep_scene = "?query=&pscenes=0&tab=scenes" network_sep_scene_pages = "?query=&pscenes=" network_sep_scene_pages_next = "&tab=scenes" network_sep_dvd = "&tab=movies" elif siteNum == 381: network = 'Burning Angel' networkdvd = False network_sep_scene = "/scene" network_sep_scene_pages = "/scene/" elif siteNum == 277: network = 'Evil Angel' networkscene = False networkscenepages = False networkdvd = False directmatch = True network_sep_scene = "/scene" network_sep_scene_pages = "/scene/" network_sep_dvd = "/dvd" elif siteNum == 382: network = 'Pretty Dirty' networkdvd = False network_sep_scene = "/scene" network_sep_scene_pages = "/scene/" elif siteNum >= 460 and siteNum <= 465: network = '21Sextreme' networkdvd = False network_sep_scene = "/scene" network_sep_scene_pages = "/scene/" if network == PAsearchSites.getSearchSiteName(siteNum): network = '' else: network = network + "/" if networkscene: # Result to check resultfirst = [] # Result next page resultsecond = [] #searchResults = HTML.ElementFromURL(PAsearchSites.getSearchSearchURL(siteNum) + encodedTitle + "?query=" + encodedTitle) encodedTitle = encodedTitle.replace("%27", "").replace( "%3F", "").replace("%2C", "") #Remove troublesome punctuation (, . ?) searchResults = HTML.ElementFromURL( PAsearchSites.getSearchSearchURL(siteNum) + network_sep_scene_prev + encodedTitle + network_sep_scene) for searchResult in searchResults.xpath('//div[@class="tlcDetails"]'): titleNoFormatting = searchResult.xpath( './/a[1]')[0].text_content().strip() titleNoFormatting = titleNoFormatting.replace("BONUS-", "BONUS - ") titleNoFormatting = titleNoFormatting.replace("BTS-", "BTS - ") curID = searchResult.xpath('.//a[1]')[0].get('href').replace( '/', '_').replace('?', '!') resultfirst.append(curID) try: actorLink = searchResult.xpath('.//div[@class="tlcActors"]/a') actor = ' - ' if "BONUS" in titleNoFormatting or "BTS" in titleNoFormatting: for actorText in actorLink: actorName = str(actorText.text_content().strip()) if "Rocco Siffredi" not in actorName and "Peter North" not in actorName: actor = actor + actorName + ", " else: actor = actor + str(actorLink[0].text_content().strip()) actor = actor.strip() actor = actor.strip(",") actor = " " + actor except: actor = '' try: releaseDate = parse( searchResult.xpath( './/div[@class="tlcSpecs"]/span[@class="tlcSpecsDate"]/span[@class="tlcDetailsValue"]' )[0].text_content().strip()).strftime('%Y-%m-%d') except: try: detailsPageElements = HTML.ElementFromURL( PAsearchSites.getSearchBaseURL(siteNum) + searchResult.xpath('.//a[1]')[0].get('href')) releaseDate = parse( detailsPageElements.xpath('//*[@class="updatedDate"]') [0].text_content().strip()).strftime('%Y-%m-%d') except: releaseDate = '' if searchDate and releaseDate: score = 100 - Util.LevenshteinDistance(searchDate, releaseDate) else: score = 100 - Util.LevenshteinDistance( searchTitle.lower(), titleNoFormatting.lower()) results.Append( MetadataSearchResult( id=curID + "|" + str(siteNum), name=titleNoFormatting + actor + " [" + network + PAsearchSites.getSearchSiteName(siteNum) + "] " + releaseDate, score=score, lang=lang)) if networkscenepages: # Other pages i = 2 while i < 3: pagenum = i if siteNum == 380: pagenum = i - 1 searchResultsSec = HTML.ElementFromURL( PAsearchSites.getSearchSearchURL(siteNum) + network_sep_scene_pages_prev + encodedTitle + network_sep_scene_pages + str(pagenum) + network_sep_scene_pages_next) i += 1 searchResultSec = searchResultsSec.xpath( '//div[@class="tlcDetails"]') if searchResultSec: titleText = searchResultSec[0].xpath('.//a[1]')[0] resultSEARCH = titleText.get('href').replace('/', '_').replace( '?', '!') for resultCheck in resultfirst: if resultCheck == resultSEARCH: i = 100 break for searchResultSec in searchResultsSec.xpath( '//div[@class="tlcDetails"]'): titleText = searchResultSec.xpath('.//a[1]')[0] titleNoFormatting = titleText.text_content().strip() titleNoFormatting = titleNoFormatting.replace( "BONUS-", "BONUS - ") titleNoFormatting = titleNoFormatting.replace( "BTS-", "BTS - ") curID = titleText.get('href').replace('/', '_').replace( '?', '!') resultsecond.append(curID) try: actorLink = searchResultSec.xpath( './/div[@class="tlcActors"]/a') actor = ' - ' if "BONUS" in titleNoFormatting or "BTS" in titleNoFormatting: for actorText in actorLink: actorName = str( actorText.text_content().strip()) if "Rocco Siffredi" not in actorName and "Peter North" not in actorName: actor = actor + actorName + ", " else: actor = actor + str( actorLink[0].text_content().strip()) actor = actor.strip() actor = actor.strip(",") actor = " " + actor except: actor = '' try: releaseDate = parse( searchResultSec.xpath( './/div[@class="tlcSpecs"]/span[@class="tlcSpecsDate"]/span[@class="tlcDetailsValue"]' )[0].text_content().strip()).strftime( '%Y-%m-%d') except: try: detailsPageElements = HTML.ElementFromURL( PAsearchSites.getSearchBaseURL(siteNum) + searchResultSec.xpath('.//a[1]')[0].get( 'href')) releaseDate = parse( detailsPageElements.xpath( '//*[@class="updatedDate"]') [0].text_content().strip()).strftime( '%Y-%m-%d') except: releaseDate = '' if searchDate and releaseDate: score = 100 - Util.LevenshteinDistance( searchDate, releaseDate) else: score = 100 - Util.LevenshteinDistance( searchTitle.lower(), titleNoFormatting.lower()) results.Append( MetadataSearchResult( id=curID + "|" + str(siteNum), name=titleNoFormatting + actor + " [" + network + PAsearchSites.getSearchSiteName(siteNum) + "] " + releaseDate, score=score, lang=lang)) resultfirst = resultsecond resultsecond = [] else: i = 100 if directmatch: # Result to check resultfirst = [] searchString = encodedTitle.replace("%20", '-').lower() #searchResults = HTML.ElementFromURL(PAsearchSites.getSearchSearchURL(siteNum) + encodedTitle + "?query=" + encodedTitle) searchResults = HTML.ElementFromURL( PAsearchSites.getSearchSearchURL(siteNum) + searchString) for searchResult in searchResults.xpath('//div[@id="infoWrapper"]'): titleNoFormatting = searchResult.xpath( './/h1[1]')[0].text_content().strip() titleNoFormatting = titleNoFormatting.replace("BONUS-", "BONUS - ") titleNoFormatting = titleNoFormatting.replace("BTS-", "BTS - ") curID = (PAsearchSites.getSearchSearchURL(siteNum) + searchString).replace('/', '_').replace('?', '!') resultfirst.append(curID) Log(curID + titleNoFormatting + "FOUND") # try: # actorLink = searchResult.xpath('.//div[@class="tlcActors"]/a') # actor = ' - ' # if "BONUS" in titleNoFormatting or "BTS" in titleNoFormatting: # for actorText in actorLink: # actorName = str(actorText.text_content().strip()) # if "Rocco Siffredi" not in actorName and "Peter North" not in actorName: # actor = actor + actorName + ", " # else: # actor = actor + str(actorLink[0].text_content().strip()) # actor = actor.strip() # actor = actor.strip(",") # actor = " " + actor # except: # actor = '' try: releaseDate = (searchResult.xpath('//li[@class="updatedDate"]') [0].text_content().strip()).strftime('%Y-%m-%d') # except: # try: # detailsPageElements = HTML.ElementFromURL(PAsearchSites.getSearchBaseURL(siteNum) + searchResult.xpath('.//a[1]')[0].get('href')) # releaseDate = parse(detailsPageElements.xpath('//*[@class="updatedDate"]')[0].text_content().strip()).strftime('%Y-%m-%d') except: releaseDate = '' Log(releaseDate + "FOUND") if searchDate and releaseDate: score = 100 - Util.LevenshteinDistance(searchDate, releaseDate) else: score = 100 - Util.LevenshteinDistance( searchTitle.lower(), titleNoFormatting.lower()) results.Append( MetadataSearchResult(id=curID + "|" + str(siteNum), name=titleNoFormatting + " [" + network + PAsearchSites.getSearchSiteName(siteNum) + "] " + releaseDate, score=score, lang=lang)) if networkdvd: try: dvdResults = HTML.ElementFromURL( PAsearchSites.getSearchSearchURL(siteNum) + network_sep_dvd_prev + encodedTitle + network_sep_dvd) for dvdResult in dvdResults.xpath( '//div[contains(@class,"tlcItem playlistable_dvds")] | //div[@class="tlcDetails"]' ): titleNoFormatting = dvdResult.xpath( './/div[@class="tlcTitle"]/a')[0].get('title').strip() curID = dvdResult.xpath('.//a')[0].get('href').replace( '/', '_').replace('?', '!') try: releaseDate = parse( dvdResult.xpath( './/div[@class="tlcSpecs"]/span[@class="tlcSpecsDate"]/span[@class="tlcDetailsValue"]' )[0].text_content().strip()) except: try: detailsPageElements = HTML.ElementFromURL( PAsearchSites.getSearchBaseURL(siteNum) + dvdResult.xpath('.//a[1]')[0].get('href')) releaseDate = parse( detailsPageElements.xpath( '//*[@class="updatedDate"]') [0].text_content().strip()) except: releaseDate = '' score = 100 - Util.LevenshteinDistance( searchTitle.lower(), titleNoFormatting.lower()) results.Append( MetadataSearchResult( id=curID + "|" + str(siteNum), name=titleNoFormatting + " (" + releaseDate.strftime('%Y') + ") - Full Movie [" + PAsearchSites.getSearchSiteName(siteNum) + "]", score=score, lang=lang)) except: pass return results
def search(results, lang, siteNum, search): searchResults = [] siteResults = [] temp = [] count = 0 sceneID = None splited = search['title'].split() if unicode(splited[0], 'UTF-8').isdigit(): sceneID = splited[0] if int(sceneID) > 100: search['title'] = search['title'].replace(sceneID, '', 1).strip() sceneURL = '%s/content/%s' % ( PAsearchSites.getSearchBaseURL(siteNum), sceneID) searchResults.append(sceneURL) search['encoded'] = search['title'].replace(' ', '+') searchURL = '%s%s' % (PAsearchSites.getSearchSearchURL(siteNum), search['encoded']) req = PAutils.HTTPRequest(searchURL, headers={'Referer': 'http://www.data18.com'}) searchPageElements = HTML.ElementFromString(req.text) for searchResult in searchPageElements.xpath( '//p[@class="genmed"]//parent::div'): sceneURL = searchResult.xpath( './/*[contains(@href, "content")]/@href')[0] if sceneURL not in searchResults: urlID = re.sub(r'.*/', '', sceneURL) try: siteName = searchResult.xpath( './/*[contains(., "Network")]')[0].text_content().replace( 'Network:', '').strip() except: try: siteName = searchResult.xpath('.//*[contains(., "Studio")]' )[0].text_content().replace( 'Studio:', '').strip() except: siteName = '' try: subSite = searchResult.xpath( './/p[@class][contains(., "Site:")]')[0].text_content( ).replace('Site:', '').strip() except: subSite = '' if siteName: siteDisplay = '%s/%s' % (siteName, subSite) if subSite else siteName else: siteDisplay = subSite titleNoFormatting = PAutils.parseTitle( searchResult.xpath('.//*[contains(@href, "content")]') [1].text_content(), siteNum) curID = PAutils.Encode(sceneURL) siteResults.append(sceneURL) try: date = searchResult.xpath( './/p[@class="genmed"]')[0].text_content().strip() date = re.sub(r'^#(.*?)\s', '', date) except: date = '' if date and not date == 'unknown': date = date.replace('Sept', 'Sep') releaseDate = parse(date).strftime('%Y-%m-%d') else: releaseDate = parse(search['date']).strftime( '%Y-%m-%d') if search['date'] else '' displayDate = releaseDate if date else '' if sceneID == urlID: score = 100 elif search['date'] and displayDate: score = 80 - Util.LevenshteinDistance(search['date'], releaseDate) else: score = 80 - Util.LevenshteinDistance( search['title'].lower(), titleNoFormatting.lower()) if score == 80: count += 1 temp.append( MetadataSearchResult( id='%s|%d|%s' % (curID, siteNum, releaseDate), name='%s [%s] %s' % (titleNoFormatting, siteDisplay, displayDate), score=score, lang=lang)) else: results.Append( MetadataSearchResult( id='%s|%d|%s' % (curID, siteNum, releaseDate), name='%s [%s] %s' % (titleNoFormatting, siteDisplay, displayDate), score=score, lang=lang)) googleResults = PAutils.getFromGoogleSearch(search['title'], siteNum) for sceneURL in googleResults: if ('/content/' in sceneURL and '.html' not in sceneURL and sceneURL not in searchResults and sceneURL not in siteResults): searchResults.append(sceneURL) for sceneURL in searchResults: req = PAutils.HTTPRequest(sceneURL) detailsPageElements = HTML.ElementFromString(req.text) urlID = re.sub(r'.*/', '', sceneURL) try: siteName = detailsPageElements.xpath( '//i[contains(., "Network")]//preceding-sibling::a[1]' )[0].text_content().strip() except: try: siteName = detailsPageElements.xpath( '//i[contains(., "Studio")]//preceding-sibling::a[1]' )[0].text_content().strip() except: siteName = '' try: subSite = detailsPageElements.xpath( '//i[contains(., "Site")]//preceding-sibling::a[1]' )[0].text_content().strip() except: subSite = '' if siteName: siteDisplay = '%s/%s' % (siteName, subSite) if subSite else siteName else: siteDisplay = subSite titleNoFormatting = PAutils.parseTitle( detailsPageElements.xpath('//h1')[0].text_content(), siteNum) curID = PAutils.Encode(sceneURL) try: date = detailsPageElements.xpath( '//span[@class][./*[contains(.., "date")]]')[0].text_content( ).split(':', 2)[-1].strip() except: date = '' if date and not date == 'unknown': releaseDate = parse(date).strftime('%Y-%m-%d') else: releaseDate = parse( search['date']).strftime('%Y-%m-%d') if search['date'] else '' displayDate = releaseDate if date else '' if sceneID == urlID: score = 100 elif search['date'] and displayDate: score = 80 - Util.LevenshteinDistance(search['date'], releaseDate) else: score = 80 - Util.LevenshteinDistance(search['title'].lower(), titleNoFormatting.lower()) if score == 80: count += 1 temp.append( MetadataSearchResult( id='%s|%d|%s' % (curID, siteNum, releaseDate), name='%s [%s] %s' % (titleNoFormatting, siteDisplay, displayDate), score=score, lang=lang)) else: results.Append( MetadataSearchResult( id='%s|%d|%s' % (curID, siteNum, releaseDate), name='%s [%s] %s' % (titleNoFormatting, siteDisplay, displayDate), score=score, lang=lang)) for result in temp: if count > 1 and result.score == 80: results.Append( MetadataSearchResult(id=result.id, name=result.name, score=79, lang=lang)) else: results.Append( MetadataSearchResult(id=result.id, name=result.name, score=result.score, lang=lang)) return results
def update(metadata, siteID, movieGenres, movieActors): Log('******UPDATE CALLED*******') metadata_id = str(metadata.id).split('|') sceneType = metadata_id[2] if len(metadata_id) > 2 else None if sceneType: sceneID = int(metadata_id[0]) sceneIDName = 'clip_id' if sceneType == 'scenes' else 'movie_id' sceneDate = metadata_id[3] apiKEY = getAPIKey(PAsearchSites.getSearchBaseURL(siteID)) urlParams = '?x-algolia-application-id=TSMKFA364Q&x-algolia-api-key=' + apiKEY url = PAsearchSites.getSearchSearchURL(siteID).replace( '*', 'girlfriendsfilms_' + sceneType, 1) + urlParams data = getAlgolia(url, 'filters=%s=%d' % (sceneIDName, sceneID), PAsearchSites.getSearchBaseURL(siteID)) detailsPageElements = data['hits'][0] url = PAsearchSites.getSearchSearchURL(siteID).replace( '*', 'girlfriendsfilms_scenes', 1) + urlParams data = getAlgolia(url, 'query=%s' % detailsPageElements['url_title'], PAsearchSites.getSearchBaseURL(siteID))['hits'] data = sorted(data, key=lambda i: i['clip_id']) scenesPagesElements = enumerate(data, 1) # Studio metadata.studio = detailsPageElements['studio_name'] # Title if sceneType == 'scenes': for idx, scene in scenesPagesElements: if scene['clip_id'] == sceneID: metadata.title = '%s, Scene %d' % ( detailsPageElements['title'], idx) if not metadata.title: metadata.title = detailsPageElements['title'] # Summary description = detailsPageElements['description'] if not description.startswith('Previously released on'): metadata.summary = description # Release Date date_object = parse(sceneDate) metadata.originally_available_at = date_object metadata.year = metadata.originally_available_at.year # Tagline and Collection(s) metadata.collections.clear() for collectionName in ['network_name', 'serie_name', 'movie_title']: if collectionName in detailsPageElements: metadata.collections.add(detailsPageElements[collectionName]) # Genres movieGenres.clearGenres() genres = detailsPageElements['categories'] for genreLink in genres: genreName = genreLink['name'] movieGenres.addGenre(genreName) if sceneType == 'movies': for idx, scene in scenesPagesElements: for genreLink in scene['categories']: movieGenres.addGenre(genreLink['name']) # Actors movieActors.clearActors() actors = detailsPageElements['actors'] for actorLink in actors: actorName = actorLink['name'] url = PAsearchSites.getSearchSearchURL(siteID).replace( '*', 'girlfriendsfilms_actors', 1) + urlParams data = getAlgolia(url, 'filters=actor_id=' + actorLink['actor_id'], PAsearchSites.getSearchBaseURL(siteID)) actorData = data['hits'][0] if actorData['pictures']: max_quality = sorted(actorData['pictures'].keys())[-1] actorPhotoURL = 'https://images-fame.gammacdn.com/actors' + actorData[ 'pictures'][max_quality] else: actorPhotoURL = '' movieActors.addActor(actorName, actorPhotoURL) # Posters art = [ 'https://images-fame.gammacdn.com/movies/{0}/{0}_{1}_front_400x625.jpg' .format(detailsPageElements['movie_id'], detailsPageElements['url_title'].lower().replace('-', '_')) ] if 'pictures' in detailsPageElements: max_quality = sorted(detailsPageElements['pictures'].keys())[-3] art.append('https://images-fame.gammacdn.com/movies/' + detailsPageElements['pictures'][max_quality]) else: for idx, scene in scenesPagesElements: max_quality = sorted(scene['pictures'].keys())[-3] art.append('https://images-fame.gammacdn.com/movies/' + scene['pictures'][max_quality]) else: sceneURL = metadata_id[0].replace('_', '/').replace('!', '?') data = urllib.urlopen(sceneURL).read() detailsPageElements = HTML.ElementFromString(data) # Studio metadata.studio = detailsPageElements.xpath( '//div[@class="studio"]//a/text()')[0] # Title metadata.title = detailsPageElements.xpath( '//h1[@class="description"]/text()')[0] # Summary metadata.summary = detailsPageElements.xpath( '//div[@class="synopsis"]')[0].text_content().strip() # Release Date date = detailsPageElements.xpath( '//div[@class="release-date"]/text()')[0].strip() date_object = parse(date) metadata.originally_available_at = date_object metadata.year = metadata.originally_available_at.year # Tagline and Collection(s) metadata.collections.clear() metadata.collections.add(metadata.studio) # Genres movieGenres.clearGenres() genres = detailsPageElements.xpath('//div[@class="categories"]//a') for genreLink in genres: genreName = genreLink.xpath('./text()')[0] movieGenres.addGenre(genreName) # Actors movieActors.clearActors() actors = detailsPageElements.xpath( '//div[@class="video-performer"]//img') for actorLink in actors: actorName = actorLink.xpath('./@title')[0] actorPhotoURL = actorLink.xpath('./@data-bgsrc')[0] if 'image-not-available-performer-female' in actorPhotoURL: actorPhotoURL = '' movieActors.addActor(actorName, actorPhotoURL) # Posters art = [detailsPageElements.xpath('//picture//img/@src')[-1]] images = re.findall(r'img = \"(.*?)\";', data) for image in images: if image not in art: art.append(image) Log('Artwork found: %d' % len(art)) for idx, posterUrl in enumerate(art, 1): if not PAsearchSites.posterAlreadyExists(posterUrl, metadata): # Download image file for analysis try: img_file = urllib.urlopen(posterUrl) im = StringIO(img_file.read()) resized_image = Image.open(im) width, height = resized_image.size # Add the image proxy items to the collection if width > 1: # Item is a poster metadata.posters[posterUrl] = Proxy.Media(HTTP.Request( posterUrl, headers={ 'Referer': 'http://www.google.com' }).content, sort_order=idx) if width > 100 and width > height: # Item is an art item metadata.art[posterUrl] = Proxy.Media(HTTP.Request( posterUrl, headers={ 'Referer': 'http://www.google.com' }).content, sort_order=idx) except: pass return metadata
def update(metadata, siteID, movieGenres, movieActors): Log('******UPDATE CALLED*******') metadata_id = str(metadata.id).split('|') sceneID = metadata_id[0] sceneType = metadata_id[2] cookies = get_Cookies(PAsearchSites.getSearchBaseURL(siteID)) headers = { 'Instance': cookies['instance_token'], } url = PAsearchSites.getSearchSearchURL( siteID) + '/v2/releases?type=%s&id=%s' % (sceneType, sceneID) req = urllib.Request(url, headers=headers) data = urllib.urlopen(req).read() detailsPageElements = json.loads(data)['result'][0] # Studio metadata.studio = detailsPageElements['brand'].title() # Title metadata.title = detailsPageElements['title'] # Summary description = None if 'description' in detailsPageElements: description = detailsPageElements['description'] elif 'parent' in detailsPageElements: if 'description' in detailsPageElements['parent']: description = detailsPageElements['parent']['description'] if description: metadata.summary = description # Release Date date_object = parse(detailsPageElements['dateReleased']) metadata.originally_available_at = date_object metadata.year = metadata.originally_available_at.year # Tagline and Collection(s) metadata.collections.clear() seriesNames = [] if 'collections' in detailsPageElements and detailsPageElements[ 'collections']: for collection in detailsPageElements['collections']: seriesNames.append(collection['name']) if 'parent' in detailsPageElements: if 'title' in detailsPageElements['parent']: seriesNames.append(detailsPageElements['parent']['title']) isInCollection = False siteName = PAsearchSites.getSearchSiteName(siteID).lower().replace( ' ', '').replace('\'', '') for seriesName in seriesNames: if seriesName.lower().replace(' ', '').replace('\'', '') == siteName: isInCollection = True break if not isInCollection: seriesNames.insert(0, PAsearchSites.getSearchSiteName(siteID)) for seriesName in seriesNames: metadata.collections.add(seriesName) # Genres movieGenres.clearGenres() genres = detailsPageElements['tags'] for genreLink in genres: genreName = genreLink['name'] movieGenres.addGenre(genreName) # Actors movieActors.clearActors() actors = detailsPageElements['actors'] for actorLink in actors: actorPageURL = PAsearchSites.getSearchSearchURL( siteID) + '/v1/actors?id=%d' % actorLink['id'] req = urllib.Request(actorPageURL, headers=headers) data = urllib.urlopen(req).read() actorData = json.loads(data)['result'][0] actorName = actorData['name'] actorPhotoURL = '' if actorData['images'] and actorData['images']['profile']: actorPhotoURL = actorData['images']['profile'][0]['xs']['url'] movieActors.addActor(actorName, actorPhotoURL) # Posters art = [] for imageType in ['poster', 'cover']: if imageType in detailsPageElements['images']: for image in detailsPageElements['images'][imageType]: art.append(image['xx']['url']) Log('Artwork found: %d' % len(art)) for idx, posterUrl in enumerate(art, 1): if not PAsearchSites.posterAlreadyExists(posterUrl, metadata): # Download image file for analysis try: img_file = urllib.urlopen(posterUrl) im = StringIO(img_file.read()) resized_image = Image.open(im) width, height = resized_image.size # Add the image proxy items to the collection if width > 1: # Item is a poster metadata.posters[posterUrl] = Proxy.Media(HTTP.Request( posterUrl, headers={ 'Referer': 'http://www.google.com' }).content, sort_order=idx) if width > 100 and width > height: # Item is an art item metadata.art[posterUrl] = Proxy.Media(HTTP.Request( posterUrl, headers={ 'Referer': 'http://www.google.com' }).content, sort_order=idx) except: pass return metadata
def search(results, encodedTitle, title, searchTitle, siteNum, lang, searchByDateActor, searchDate, searchSiteID): if searchSiteID != 9999: siteNum = searchSiteID sceneID = searchTitle.split(' ', 1)[0] if unicode(sceneID, 'utf8').isdigit(): searchTitle = searchTitle.replace(sceneID, '', 1).strip() else: sceneID = None apiKEY = getAPIKey(PAsearchSites.getSearchBaseURL(siteNum)) for sceneType in ['scenes', 'movies']: url = PAsearchSites.getSearchSearchURL(siteNum).replace( '*', 'girlfriendsfilms_' + sceneType, 1 ) + '?x-algolia-application-id=TSMKFA364Q&x-algolia-api-key=' + apiKEY data = getAlgolia(url, 'query=' + searchTitle, PAsearchSites.getSearchBaseURL(siteNum)) searchResults = data['hits'] for searchResult in searchResults: if sceneType == 'scenes': releaseDate = parse(searchResult['release_date']) actors = [] for actorLink in searchResult['female_actors']: actors.append(actorLink['name']) sceneData = ', '.join(actors) curID = searchResult['clip_id'] titleNoFormatting = '%s %s' % (searchResult['title'], sceneData) else: date = 'last_modified' if searchResult[ 'last_modified'] else 'date_created' releaseDate = parse(searchResult[date]) curID = searchResult['movie_id'] titleNoFormatting = searchResult['title'] description = searchResult['description'] if description.startswith('Previously released on'): date = description.split('.', 1)[0].replace( 'Previously released on', '', 1).strip() releaseDate = parse(date) if searchDate: date = parse(searchDate) if date.year < releaseDate.year: releaseDate = date releaseDate = releaseDate.strftime('%Y-%m-%d') if sceneID: score = 100 - Util.LevenshteinDistance(sceneID, curID) else: score = 100 - Util.LevenshteinDistance( searchTitle.lower(), titleNoFormatting.lower()) results.Append( MetadataSearchResult( id='%d|%d|%s|%s' % (curID, siteNum, sceneType, releaseDate), name='[%s] %s %s' % (sceneType.capitalize(), titleNoFormatting, releaseDate), score=score, lang=lang)) searchResults = HTML.ElementFromURL( 'https://www.girlfriendsfilms.net/Search?media=2&q=' + encodedTitle) for searchResult in searchResults.xpath('//div[@class="grid-item"]'): titleNoFormatting = searchResult.xpath( './/span[@class="overlay-inner"]//text()')[0] sceneURL = 'https://www.girlfriendsfilms.net' + searchResult.xpath( './/a/@href')[0] curID = sceneURL.replace('/', '_').replace('?', '!') score = 100 - Util.LevenshteinDistance(searchTitle.lower(), titleNoFormatting.lower()) results.Append( MetadataSearchResult(id='%s|%d' % (curID, siteNum), name='[DVD] %s' % (titleNoFormatting), score=score, lang=lang)) return results
def search(results, encodedTitle, title, searchTitle, siteNum, lang, searchByDateActor, searchDate, searchSiteID): if searchSiteID != 9999: siteNum = searchSiteID if siteNum == 278 or (siteNum >= 285 and siteNum <= 287): network = 'XEmpire' elif siteNum == 329 or (siteNum >= 351 and siteNum <= 354): network = 'Blowpass' elif siteNum == 331 or (siteNum >= 355 and siteNum <= 360): network = 'Fantasy Massage' elif siteNum == 330 or siteNum == 332 or (siteNum >= 361 and siteNum <= 364): network = 'Mile High Network' elif (siteNum >= 365 and siteNum <= 372) or siteNum == 466: network = '21Sextury' elif siteNum == 183 or (siteNum >= 373 and siteNum <= 374): network = '21Naturals' elif siteNum == 53 or (siteNum >= 375 and siteNum <= 379): network = 'Girlsway' elif siteNum >= 383 and siteNum <= 386: network = 'Fame Digital' elif siteNum >= 387 and siteNum <= 392: network = 'Open Life Network' elif siteNum == 281: network = 'Pure Taboo' elif siteNum == 380: network = 'Girlfriends Films' elif siteNum == 381: network = 'Burning Angel' elif siteNum == 277: network = 'Evil Angel' elif siteNum == 382: network = 'Pretty Dirty' elif siteNum >= 460 and siteNum <= 466: network = '21Sextreme' if network == PAsearchSites.getSearchSiteName(siteNum): network = '' else: network = network + "/" searchResults = HTML.ElementFromURL( PAsearchSites.getSearchSearchURL(siteNum) + encodedTitle + "?query=" + encodedTitle) for searchResult in searchResults.xpath('//div[@class="tlcDetails"]'): titleNoFormatting = searchResult.xpath( './/a[1]')[0].text_content().strip() curID = searchResult.xpath('.//a[1]')[0].get('href').replace( '/', '_').replace('?', '!') try: releaseDate = parse( searchResult.xpath( './/div[@class="tlcSpecs"]/span[@class="tlcSpecsDate"]/span[@class="tlcDetailsValue"]' )[0].text_content().strip()).strftime('%Y-%m-%d') except: try: detailsPageElements = HTML.ElementFromURL( PAsearchSites.getSearchBaseURL(siteNum) + searchResult.xpath('.//a[1]')[0].get('href')) releaseDate = parse( detailsPageElements.xpath('//*[@class="updatedDate"]') [0].text_content().strip()).strftime('%Y-%m-%d') except: releaseDate = '' if searchDate and releaseDate: score = 100 - Util.LevenshteinDistance(searchDate, releaseDate) else: score = 100 - Util.LevenshteinDistance(searchTitle.lower(), titleNoFormatting.lower()) results.Append( MetadataSearchResult(id=curID + "|" + str(siteNum), name=titleNoFormatting + " [" + network + PAsearchSites.getSearchSiteName(siteNum) + "] " + releaseDate, score=score, lang=lang)) try: dvdResults = HTML.ElementFromURL( PAsearchSites.getSearchSearchURL(siteNum) + encodedTitle + "/dvd") for dvdResult in dvdResults.xpath( '//div[contains(@class,"tlcItem playlistable_dvds")] | //div[@class="tlcDetails"]' ): titleNoFormatting = dvdResult.xpath( './/div[@class="tlcTitle"]/a')[0].get('title').strip() curID = dvdResult.xpath('.//a')[0].get('href').replace( '/', '_').replace('?', '!') try: releaseDate = parse( dvdResult.xpath( './/div[@class="tlcSpecs"]/span[@class="tlcSpecsDate"]/span[@class="tlcDetailsValue"]' )[0].text_content().strip()) except: try: detailsPageElements = HTML.ElementFromURL( PAsearchSites.getSearchBaseURL(siteNum) + dvdResult.xpath('.//a[1]')[0].get('href')) releaseDate = parse( detailsPageElements.xpath('//*[@class="updatedDate"]') [0].text_content().strip()) except: releaseDate = '' score = 100 - Util.LevenshteinDistance(searchTitle.lower(), titleNoFormatting.lower()) results.Append( MetadataSearchResult( id=curID + "|" + str(siteNum), name=titleNoFormatting + " (" + releaseDate.strftime('%Y') + ") - Full Movie [" + PAsearchSites.getSearchSiteName(siteNum) + "]", score=score, lang=lang)) except: pass return results
def update(metadata, siteID, movieGenres, movieActors): Log('******UPDATE CALLED*******') url = str(metadata.id).split("|")[0].replace('_', '/').replace( '?', '!').replace('/vids.html', '_vids.html') detailsPageElements = HTML.ElementFromURL(url) art = [] metadata.collections.clear() movieGenres.clearGenres() movieActors.clearActors() # Studio metadata.studio = 'Jules Jordan' # Title metadata.title = detailsPageElements.xpath( '//span[@class="title_bar_hilite"]')[0].text_content().strip() # Summary metadata.summary = detailsPageElements.xpath( '//span[@class="update_description"]')[0].text_content().strip() # Tagline and Collection(s) tagline = PAsearchSites.getSearchSiteName(siteID) tagline = tagline.strip() metadata.tagline = tagline metadata.collections.add(tagline) try: dvdName = detailsPageElements.xpath( '//span[@class="update_dvds"]')[0].text_content().replace( 'Movie:', '').strip() metadata.collections.add(dvdName) except: pass # Genres genres = detailsPageElements.xpath('//span[@class="update_tags"]/a') if len(genres) > 0: for genreLink in genres: genreName = genreLink.text_content().strip('\n').lower() movieGenres.addGenre(genreName) # Release Date date = detailsPageElements.xpath( '//div[@class="cell update_date"]')[0].text_content().strip() if date == '': try: date = str( detailsPageElements.xpath( './/div[@class="cell update_date"]/comment()')[0]).strip() date = date[date.find('OFF') + 4:date.find('D', date.find('OFF') + 4)].strip() except: pass if len(date) > 0: date_object = parse(date) metadata.originally_available_at = date_object metadata.year = metadata.originally_available_at.year # Actors if PAsearchSites.getSearchSiteName(siteID) == "GirlGirl": actors = detailsPageElements.xpath('//div[@class="item"]/span/div/a') else: actors = detailsPageElements.xpath( '//div[@class="backgroundcolor_info"]/span[@class="update_models"]/a' ) if len(actors) > 0: for actorLink in actors: actorName = str(actorLink.text_content().strip()) actorPageURL = actorLink.get("href") actorPage = HTML.ElementFromURL(actorPageURL) try: actorPhotoURL = actorPage.xpath( '//img[@class="model_bio_thumb stdimage thumbs target"]' )[0].get("src0_3x") if 'http' not in actorPhotoURL: actorPhotoURL = PAsearchSites.getSearchBaseURL( siteID) + actorPhotoURL except: actorPhotoURL = '' movieActors.addActor(actorName, actorPhotoURL) # Posters # Video trailer background try: bigScript = detailsPageElements.xpath( '//script[contains(text(),"df_movie")]')[0].text_content() alpha = bigScript.find('useimage = "') + 12 omega = bigScript.find('";', alpha) background = bigScript[alpha:omega] if 'http' not in background: background = PAsearchSites.getSearchBaseURL(siteID) + background Log("background: " + background) art.append(background) except: pass # Slideshow of images from the Search page try: bigScript = detailsPageElements.xpath( '//script[contains(text(),"df_movie")]')[0].text_content() alpha = bigScript.find('setid:"') + 7 omega = bigScript.find('",', alpha) setID = bigScript[alpha:omega] Log("setID: " + setID) searchPageElements = HTML.ElementFromURL( (PAsearchSites.getSearchSearchURL(siteID) + metadata.title).replace(' ', '%20')) posterUrl = searchPageElements.xpath('//img[@id="set-target-' + setID + '"]')[0].get('src') if 'http' not in posterUrl: posterUrl = PAsearchSites.getSearchBaseURL(siteID) + posterUrl Log("slideshow: " + posterUrl) art.append(posterUrl) i = 0 for i in range(0, 7): try: posterUrl = searchPageElements.xpath('//img[@id="set-target-' + setID + '"]')[0].get('src' + i + '_1x') if 'http' not in posterUrl: posterUrl = PAsearchSites.getSearchBaseURL( siteID) + posterUrl Log("slideshow: " + posterUrl) art.append(posterUrl) except: pass except: pass # Photos page try: photoPageURL = detailsPageElements.xpath( '//div[@class="cell content_tab"]/a[text()="Photos"]')[0].get( 'href') photoPageElements = HTML.ElementFromURL(photoPageURL) bigScript = photoPageElements.xpath( '//script[contains(text(),"var ptx")]')[0].text_content() ptx1600starts = bigScript.find('1600') ptx1600ends = bigScript.find('togglestatus', ptx1600starts) ptx1600 = bigScript[ptx1600starts:ptx1600ends] photos = [] i = 1 alpha = 0 omega = 0 imageCount = ptx1600.count('ptx["1600"][') Log("Photos found: " + str(imageCount)) while i <= imageCount: alpha = ptx1600.find('{src: "', omega) + 7 omega = ptx1600.find('"', alpha) posterUrl = ptx1600[alpha:omega] if 'http' not in posterUrl: posterUrl = PAsearchSites.getSearchBaseURL(siteID) + posterUrl photos.append(posterUrl) i = i + 1 for x in range(10): art.append(photos[random.randint(1, imageCount)]) except: pass # Vidcaps page try: capsPageURL = detailsPageElements.xpath( '//div[@class="cell content_tab"]/a[text()="Caps"]')[0].get('href') capsPageElements = HTML.ElementFromURL(capsPageURL) bigScript = capsPageElements.xpath( '//script[contains(text(),"var ptx")]')[0].text_content() ptxjpgstarts = bigScript.find('ptx["jpg"] = {};') ptxjpgends = bigScript.find('togglestatus', ptxjpgstarts) ptxjpg = bigScript[ptxjpgstarts:ptxjpgends] vidcaps = [] i = 1 alpha = 0 omega = 0 imageCount = ptxjpg.count('ptx["jpg"][') Log("Vidcaps found: " + str(imageCount)) while i <= imageCount: alpha = ptxjpg.find('{src: "', omega) + 7 omega = ptxjpg.find('"', alpha) posterUrl = ptxjpg[alpha:omega] if 'http' not in posterUrl: posterUrl = PAsearchSites.getSearchBaseURL(siteID) + posterUrl vidcaps.append(posterUrl) i = i + 1 for x in range(10): art.append(vidcaps[random.randint(1, imageCount)]) except: pass j = 1 Log("Artwork found: " + str(len(art))) for posterUrl in art: if not PAsearchSites.posterAlreadyExists(posterUrl, metadata): #Download image file for analysis try: img_file = urllib.urlopen(posterUrl) im = StringIO(img_file.read()) resized_image = Image.open(im) width, height = resized_image.size #Add the image proxy items to the collection if width > 1 or height > width: # Item is a poster metadata.posters[posterUrl] = Proxy.Preview(HTTP.Request( posterUrl, headers={ 'Referer': 'http://www.google.com' }).content, sort_order=j) if width > 100 and width > height: # Item is an art item metadata.art[posterUrl] = Proxy.Preview(HTTP.Request( posterUrl, headers={ 'Referer': 'http://www.google.com' }).content, sort_order=j) j = j + 1 except: pass return metadata
def update(metadata, siteNum, movieGenres, movieActors): metadata_id = str(metadata.id).split('|') sceneID = int(metadata_id[0]) sceneType = metadata_id[2] sceneIDName = 'clip_id' if sceneType == 'scenes' else 'movie_id' sceneDate = metadata_id[3] apiKEY = getAPIKey(siteNum) url = PAsearchSites.getSearchSearchURL( siteNum ) + '?x-algolia-application-id=TSMKFA364Q&x-algolia-api-key=' + apiKEY data = getAlgolia(url, 'all_' + sceneType, 'filters=%s=%d' % (sceneIDName, sceneID), PAsearchSites.getSearchBaseURL(siteNum)) detailsPageElements = data[0] data = getAlgolia(url, 'all_scenes', 'query=%s' % detailsPageElements['url_title'], PAsearchSites.getSearchBaseURL(siteNum)) data = sorted(data, key=lambda i: i['clip_id']) scenesPagesElements = list(enumerate(data, 1)) # Title title = None if sceneType == 'scenes' and len(scenesPagesElements) > 1: for idx, scene in scenesPagesElements: if scene['clip_id'] == sceneID: title = '%s, Scene %d' % (detailsPageElements['title'], idx) break if not title: title = detailsPageElements['title'] metadata.title = title # Summary metadata.summary = detailsPageElements['description'].replace( '</br>', '\n').replace('<br>', '\n') # Studio metadata.studio = detailsPageElements['network_name'] # Tagline and Collection(s) metadata.collections.clear() for collectionName in ['studio_name', 'serie_name']: if collectionName in detailsPageElements: metadata.collections.add(detailsPageElements[collectionName]) if (':' in detailsPageElements['title'] or '#' in detailsPageElements['title']) and len(scenesPagesElements) > 1: if 'movie_title' in detailsPageElements: metadata.collections.add(detailsPageElements['movie_title']) # Release Date date_object = parse(sceneDate) metadata.originally_available_at = date_object metadata.year = metadata.originally_available_at.year # Genres movieGenres.clearGenres() for genreLink in detailsPageElements['categories']: genreName = genreLink['name'] if genreName: movieGenres.addGenre(genreName) if sceneType == 'movies': for idx, scene in scenesPagesElements: for genreLink in scene['categories']: genreName = genreLink['name'] if genreName: movieGenres.addGenre(genreName) # Actors movieActors.clearActors() female = [] male = [] for actorLink in detailsPageElements['actors']: actorName = actorLink['name'] actorData = getAlgolia(url, 'all_actors', 'filters=actor_id=' + actorLink['actor_id'], PAsearchSites.getSearchBaseURL(siteNum))[0] if 'pictures' in actorData and actorData['pictures']: max_quality = sorted(actorData['pictures'].keys())[-1] actorPhotoURL = 'https://images-fame.gammacdn.com/actors' + actorData[ 'pictures'][max_quality] else: actorPhotoURL = '' if actorLink['gender'] == 'female': female.append((actorName, actorPhotoURL)) else: male.append((actorName, actorPhotoURL)) combined = female + male for actor in combined: movieActors.addActor(actor[0], actor[1]) # Posters art = [] if not PAsearchSites.getSearchBaseURL(siteNum).endswith( ('girlsway.com', 'puretaboo.com')): art.append( 'https://images-fame.gammacdn.com/movies/{0}/{0}_{1}_front_400x625.jpg' .format(detailsPageElements['movie_id'], detailsPageElements['url_title'].lower().replace('-', '_'))) if 'url_movie_title' in detailsPageElements: art.append( 'https://images-fame.gammacdn.com/movies/{0}/{0}_{1}_front_400x625.jpg' .format( detailsPageElements['movie_id'], detailsPageElements['url_movie_title'].lower().replace( '-', '_'))) if 'pictures' in detailsPageElements and detailsPageElements['pictures']: max_quality = detailsPageElements['pictures']['nsfw']['top'].keys()[0] pictureURL = 'https://images-fame.gammacdn.com/movies/' + detailsPageElements[ 'pictures'][max_quality] if sceneType == 'movies': art.append(pictureURL) else: art.insert(0, pictureURL) Log('Artwork found: %d' % len(art)) for idx, posterUrl in enumerate(art, 1): if not PAsearchSites.posterAlreadyExists(posterUrl, metadata): # Download image file for analysis try: image = PAutils.HTTPRequest(posterUrl) im = StringIO(image.content) resized_image = Image.open(im) width, height = resized_image.size # Add the image proxy items to the collection if width > 1: # Item is a poster metadata.posters[posterUrl] = Proxy.Media(image.content, sort_order=idx) if width > 100 and width > height: # Item is an art item metadata.art[posterUrl] = Proxy.Media(image.content, sort_order=idx) except: pass return metadata
def update(metadata, lang, siteNum, movieGenres, movieActors): metadata_id = str(metadata.id).split('|') sceneName = metadata_id[0] releaseDate = metadata_id[2] contentName = metadata_id[3] detailsPageElements = getJSONfromPage( PAsearchSites.getSearchSearchURL(siteNum) + sceneName)[contentName][sceneName] # Title metadata.title = detailsPageElements['title'] # Summary metadata.summary = detailsPageElements['description'] # Studio metadata.studio = 'Mylf' # Tagline and Collection(s) metadata.collections.clear() if 'site' in detailsPageElements: subSite = detailsPageElements['site']['name'] else: subSite = PAsearchSites.getSearchSiteName(siteNum) metadata.tagline = subSite if Prefs['collections_addsitename']: metadata.collections.add(subSite) # Release Date if releaseDate: date_object = parse(releaseDate) metadata.originally_available_at = date_object metadata.year = metadata.originally_available_at.year # Actors movieActors.clearActors() actors = detailsPageElements['models'] for actorLink in actors: actorID = actorLink['modelId'] actorName = actorLink['modelName'] actorPhotoURL = '' actorData = getJSONfromPage( '%s/models/%s' % (PAsearchSites.getSearchBaseURL(siteNum), actorID)) if actorData: actorPhotoURL = actorData['modelsContent'][actorID]['img'] movieActors.addActor(actorName, actorPhotoURL) # Genres movieGenres.clearGenres() genres = ['MILF', 'Mature'] if subSite.lower() == 'MylfBoss'.lower(): for genreName in ['Office', 'Boss']: movieGenres.addGenre(genreName) elif subSite.lower() == 'MylfBlows'.lower(): for genreName in ['B*****b']: movieGenres.addGenre(genreName) elif subSite.lower() == 'Milfty'.lower(): for genreName in ['Cheating']: movieGenres.addGenre(genreName) elif subSite.lower() == 'Mom Drips'.lower(): for genreName in ['Creampie']: movieGenres.addGenre(genreName) elif subSite.lower() == 'Milf Body'.lower(): for genreName in ['Gym', 'Fitness']: movieGenres.addGenre(genreName) elif subSite.lower() == 'Lone Milf'.lower(): for genreName in ['Solo']: movieGenres.addGenre(genreName) elif subSite.lower() == 'Full Of JOI'.lower(): for genreName in ['JOI']: movieGenres.addGenre(genreName) elif subSite.lower() == 'Mylfed'.lower(): for genreName in ['Lesbian', 'Girl on Girl', 'GG']: movieGenres.addGenre(genreName) elif subSite.lower() == 'MylfDom'.lower(): for genreName in ['BDSM']: movieGenres.addGenre(genreName) if (len(actors) > 1) and subSite != 'Mylfed': genres.append('Threesome') for genreLink in genres: genreName = genreLink movieGenres.addGenre(genreName) # Posters art = [detailsPageElements['img']] Log('Artwork found: %d' % len(art)) for idx, posterUrl in enumerate(art, 1): if not PAsearchSites.posterAlreadyExists(posterUrl, metadata): # Download image file for analysis try: image = PAutils.HTTPRequest(posterUrl) im = StringIO(image.content) resized_image = Image.open(im) width, height = resized_image.size # Add the image proxy items to the collection if width > 1: # Item is a poster metadata.posters[posterUrl] = Proxy.Media(image.content, sort_order=idx) if width > 100 and width > height: # Item is an art item metadata.art[posterUrl] = Proxy.Media(image.content, sort_order=idx) except: pass return metadata
def update(metadata, lang, siteNum, movieGenres, movieActors): metadata_id = str(metadata.id).split('|') sceneURL = PAutils.Decode(metadata_id[0]) if not sceneURL.startswith('http'): sceneURL = PAsearchSites.getSearchSearchURL(siteNum) + sceneURL req = PAutils.HTTPRequest(sceneURL) detailsPageElements = HTML.ElementFromString(req.text) # Title metadata.title = detailsPageElements.xpath( '//h1')[0].text_content().strip() # Summary summary_xpaths = [ '//div[@class="p-desc"]', '//div[contains(@class, "desc")]' ] for xpath in summary_xpaths: for summary in detailsPageElements.xpath(xpath): metadata.summary = summary.text_content().replace( 'Read More »', '').strip() break # Studio metadata.studio = 'Score Group' # Tagline and Collection(s) metadata.collections.clear() tagline = PAsearchSites.getSearchSiteName(siteNum) metadata.tagline = tagline if Prefs['collections_addsitename']: metadata.collections.add(metadata.tagline) # Release Date date = detailsPageElements.xpath('//div/span[@class="value"]') if date: date = date[1].text_content().strip() date_object = parse(date) metadata.originally_available_at = date_object metadata.year = metadata.originally_available_at.year # Actors movieActors.clearActors() for actorLink in detailsPageElements.xpath('//div/span[@class="value"]/a'): actorName = actorLink.text_content().strip() actorPhotoURL = '' movieActors.addActor(actorName, actorPhotoURL) if siteNum == 1344: movieActors.addActor('Christy Marks', '') # Genres movieGenres.clearGenres() for genreLink in detailsPageElements.xpath('//div[@class="mb-3"]/a'): genreName = genreLink.text_content().strip() movieGenres.addGenre(genreName) # Posters/Background art = [] match = re.search(r'posterImage: \'(.*)\'', req.text) if match: art.append(match.group(1)) xpaths = [ '//div[contains(@class, "thumb")]/img/@src', '//div[contains(@class, "p-image")]/a/img/@src', '//div[contains(@class, "dl-opts")]/a/img/@src', '//div[contains(@class, "p-photos")]/div/div/a/@href', '//div[contains(@class, "gallery")]/div/div/a/@href' ] for xpath in xpaths: for poster in detailsPageElements.xpath(xpath): if not poster.startswith('http'): poster = 'http:' + poster if 'shared-bits' not in poster: art.append(poster) Log('Artwork found: %d' % len(art)) for idx, posterUrl in enumerate(art, 1): if not PAsearchSites.posterAlreadyExists(posterUrl, metadata): # Download image file for analysis try: image = PAutils.HTTPRequest(posterUrl) im = StringIO(image.content) resized_image = Image.open(im) width, height = resized_image.size # Add the image proxy items to the collection if width > 1: # Item is a poster metadata.posters[posterUrl] = Proxy.Media(image.content, sort_order=idx) if width > 100: # Item is an art item metadata.art[posterUrl] = Proxy.Media(image.content, sort_order=idx) except: pass return metadata
def search(results, encodedTitle, searchTitle, siteNum, lang, searchDate): searchResults = [] siteResults = [] temp = [] count = 0 sceneID = None splited = searchTitle.split(' ') if unicode(splited[0], 'UTF-8').isdigit(): sceneID = splited[0] if int(sceneID) > 100: searchTitle = searchTitle.replace(sceneID, '', 1).strip() movieURL = '%s/movies/%s' % ( PAsearchSites.getSearchBaseURL(siteNum), sceneID) searchResults.append(movieURL) encodedTitle = searchTitle.replace(' ', '+') searchURL = '%s%s' % (PAsearchSites.getSearchSearchURL(siteNum), encodedTitle) req = PAutils.HTTPRequest(searchURL, headers={'Referer': 'http://www.data18.com'}) searchPageElements = HTML.ElementFromString(req.text) for searchResult in searchPageElements.xpath( '//a[contains(@href, "movies")]//parent::div[contains(@style, "float: left; padding")]' ): movieURL = searchResult.xpath('.//*[img]/@href')[0] urlID = re.sub(r'.*/', '', movieURL) if movieURL not in searchResults: titleNoFormatting = PAutils.parseTitle( searchResult.xpath('.//*[contains(@href, "movies")]') [1].text_content(), siteNum) curID = PAutils.Encode(movieURL) siteResults.append(movieURL) date = searchResult.text if date and not date == 'unknown': try: releaseDate = datetime.strptime( date, '%Y%m%d').strftime('%Y-%m-%d') except: releaseDate = '' else: releaseDate = parse(searchDate).strftime( '%Y-%m-%d') if searchDate else '' displayDate = releaseDate if date else '' if sceneID == urlID: score = 100 elif searchDate and displayDate: score = 80 - Util.LevenshteinDistance(searchDate, releaseDate) else: score = 80 - Util.LevenshteinDistance( searchTitle.lower(), titleNoFormatting.lower()) if score == 80: count += 1 temp.append( MetadataSearchResult( id='%s|%d|%s' % (curID, siteNum, releaseDate), name='%s %s' % (titleNoFormatting, displayDate), score=score, lang=lang)) else: results.Append( MetadataSearchResult( id='%s|%d|%s' % (curID, siteNum, releaseDate), name='%s %s' % (titleNoFormatting, displayDate), score=score, lang=lang)) googleResults = PAutils.getFromGoogleSearch(searchTitle, siteNum) for movieURL in googleResults: if ('/movies/' in movieURL and '.html' not in movieURL and movieURL not in searchResults and movieURL not in siteResults): searchResults.append(movieURL) for movieURL in searchResults: req = PAutils.HTTPRequest(movieURL) detailsPageElements = HTML.ElementFromString(req.text) urlID = re.sub(r'.*/', '', movieURL) try: siteName = detailsPageElements.xpath( '//i[contains(., "Network")]//preceding-sibling::a[1]' )[0].text_content().strip() except: try: siteName = detailsPageElements.xpath( '//i[contains(., "Studio")]//preceding-sibling::a[1]' )[0].text_content().strip() except: try: siteName = detailsPageElements.xpath( '//i[contains(., "Site")]//preceding-sibling::a[1]' )[0].text_content().strip() except: siteName = '' titleNoFormatting = PAutils.parseTitle( detailsPageElements.xpath('//h1')[0].text_content(), siteNum) curID = PAutils.Encode(movieURL) try: date = detailsPageElements.xpath('//p[contains(., "Release")]')[ 0].text_content().text_content().split(':')[2].strip() except: date = '' if date and not date == 'unknown': releaseDate = parse(date).strftime('%Y-%m-%d') else: releaseDate = parse(searchDate).strftime( '%Y-%m-%d') if searchDate else '' displayDate = releaseDate if date else '' if sceneID == urlID: score = 100 elif searchDate and displayDate: score = 80 - Util.LevenshteinDistance(searchDate, releaseDate) else: score = 80 - Util.LevenshteinDistance(searchTitle.lower(), titleNoFormatting.lower()) if score == 80: count += 1 temp.append( MetadataSearchResult( id='%s|%d|%s' % (curID, siteNum, releaseDate), name='%s [%s] %s' % (titleNoFormatting, siteName, displayDate), score=score, lang=lang)) else: results.Append( MetadataSearchResult( id='%s|%d|%s' % (curID, siteNum, releaseDate), name='%s [%s] %s' % (titleNoFormatting, siteName, displayDate), score=score, lang=lang)) for result in temp: if count > 1 and result.score == 80: results.Append( MetadataSearchResult(id=result.id, name=result.name, score=79, lang=lang)) else: results.Append( MetadataSearchResult(id=result.id, name=result.name, score=result.score, lang=lang)) return results
def update(metadata, siteNum, movieGenres, movieActors): metadata_id = str(metadata.id).split('|') sceneID = metadata_id[0] detailsPageElements = getDataFromAPI( PAsearchSites.getSearchSearchURL(siteNum), 'identifier', sceneID)['hits']['hits'][0]['_source'] # Title metadata.title = detailsPageElements['name'] # Summary metadata.summary = detailsPageElements['description'] # Studio metadata.studio = detailsPageElements['studio']['name'].title() # Tagline and Collection(s) metadata.collections.add(metadata.studio) seriesScene = detailsPageElements['series']['name'] if seriesScene: metadata.collections.add(seriesScene.title()) # Release Date date = detailsPageElements['releaseDate'] date_object = datetime.strptime(date, '%Y-%m-%d') metadata.originally_available_at = date_object metadata.year = metadata.originally_available_at.year # Actors movieActors.clearActors() for actorLink in detailsPageElements['actors']: actorName = actorLink['name'] actorPhotoURL = 'https://i.bang.com/pornstars/%d.jpg' % actorLink['id'] movieActors.addActor(actorName, actorPhotoURL) # Genres movieGenres.clearGenres() for genreLink in detailsPageElements['genres']: genreName = genreLink['name'] movieGenres.addGenre(genreName) # Posters dvdID = detailsPageElements['dvd']['id'] art = ['https://i.bang.com/covers/%d/front.jpg' % dvdID] for img in detailsPageElements['screenshots']: art.append('https://i.bang.com/screenshots/%d/movie/1/%d.jpg' % (dvdID, img['screenId'])) Log('Artwork found: %d' % len(art)) for idx, posterUrl in enumerate(art, 1): if not PAsearchSites.posterAlreadyExists(posterUrl, metadata): # Download image file for analysis try: image = PAutils.HTTPRequest(posterUrl) im = StringIO(image.content) resized_image = Image.open(im) width, height = resized_image.size # Add the image proxy items to the collection if width > 1: # Item is a poster metadata.posters[posterUrl] = Proxy.Media(image.content, sort_order=idx) if width > 100 and width > height: # Item is an art item metadata.art[posterUrl] = Proxy.Media(image.content, sort_order=idx) except: pass return metadata
def update(metadata, lang, siteNum, movieGenres, movieActors, art): metadata_id = str(metadata.id).split('|') sceneURL = PAutils.Decode(metadata_id[0]) if not sceneURL.startswith('http'): sceneURL = PAsearchSites.getSearchSearchURL(siteNum) + sceneURL req = PAutils.HTTPRequest(sceneURL) detailsPageElements = HTML.ElementFromString(req.text) JAVID = sceneURL.rsplit('/', 1)[1] # Studio javStudio = detailsPageElements.xpath( '//p/a[contains(@href, "/studio/")]')[0].text_content().strip() metadata.studio = javStudio # Title javTitle = detailsPageElements.xpath( '//head/title')[0].text_content().strip().replace(' - JavBus', '') if JAVID.replace('-', '').replace('_', '').replace(' ', '').isdigit(): javTitle = javStudio + ' ' + javTitle metadata.title = javTitle # Tagline data = {} label = detailsPageElements.xpath('//p/a[contains(@href, "/label/")]') if label: data['Label'] = label[0].text_content().strip() series = detailsPageElements.xpath('//p/a[contains(@href, "/series/")]') if series: data['Series'] = series[0].text_content().strip() metadata.tagline = ', '.join( ['%s: %s' % (key, value) for key, value in data.items()]) # Release Date date = detailsPageElements.xpath('//div[@class="col-md-3 info"]/p[2]' )[0].text_content().strip().replace( 'Release Date: ', '') date_object = datetime.strptime(date, '%Y-%m-%d') metadata.originally_available_at = date_object metadata.year = metadata.originally_available_at.year # Genres for genreLink in detailsPageElements.xpath( '//span[@class="genre"]/a[contains(@href, "/genre/")]'): genreName = genreLink.text_content().lower().strip() movieGenres.addGenre(genreName) metadata.collections.add('Japan Adult Video') # Actors movieActors.clearActors() for actorLink in detailsPageElements.xpath('//a[@class="avatar-box"]'): fullActorName = actorLink.text_content().strip() actorPhotoURL = detailsPageElements.xpath( '//a[@class="avatar-box"]/div[@class="photo-frame"]/img[contains(@title, "%s")]/@src' % (fullActorName))[0] if not actorPhotoURL.startswith('http'): actorPhotoURL = PAsearchSites.getSearchBaseURL( siteNum) + actorPhotoURL if actorPhotoURL.rsplit('/', 1)[1] == 'nowprinting.gif': actorPhotoURL = '' movieActors.addActor(fullActorName, actorPhotoURL) # Posters xpaths = [ '//a[contains(@href, "/cover/")]/@href', '//a[@class="sample-box"]/div/img/@src', ] for xpath in xpaths: for poster in detailsPageElements.xpath(xpath): if not poster.startswith('http'): poster = PAsearchSites.getSearchBaseURL(siteNum) + poster art.append(poster) coverImage = detailsPageElements.xpath( '//a[contains(@href, "/cover/")]/@href') coverImageCode = coverImage[0].rsplit('/', 1)[1].split('.')[0].split('_')[0] imageHost = coverImage[0].rsplit('/', 2)[0] coverImage = imageHost + '/thumb/' + coverImageCode + '.jpg' if coverImage.count('/images.') == 1: coverImage = coverImage.replace('thumb', 'thumbs') if not coverImage.startswith('http'): coverImage = PAsearchSites.getSearchBaseURL(siteNum) + coverImage art.append(coverImage) Log('Artwork found: %d' % len(art)) for idx, posterUrl in enumerate(art, 1): if not PAsearchSites.posterAlreadyExists(posterUrl, metadata): # Download image file for analysis try: image = PAutils.HTTPRequest(posterUrl) im = StringIO(image.content) resized_image = Image.open(im) width, height = resized_image.size # Add the image proxy items to the collection if width > 1: # Item is a poster metadata.posters[posterUrl] = Proxy.Media(image.content, sort_order=idx) if width > 100 and idx > 1: # Item is an art item metadata.art[posterUrl] = Proxy.Media(image.content, sort_order=idx) except: pass return metadata
def update(metadata, siteID, movieGenres, movieActors): detailsPageURL = str(metadata.id).split("|")[0].replace('_', '/').replace( "!", "?") detailsPageElements = HTML.ElementFromURL(detailsPageURL) thisPage = detailsPageElements.xpath( '//a[contains(text(),"trailer")]')[0].get('href') metadata.collections.clear() metadata.studio = "Digital Playground" art = [] # Title title = detailsPageElements.xpath('//h1')[0].text_content().strip() #Determine what we're looking for and gather the information as needed if "/series/" in detailsPageURL: # This is an episode in a Series seriesInfoPageElements = HTML.ElementFromURL( PAsearchSites.getSearchBaseURL(siteID) + detailsPageElements.xpath( '//a[contains(text(),"info")]')[0].get("href")) seriesTrailerPageElements = HTML.ElementFromURL( PAsearchSites.getSearchBaseURL(siteID) + seriesInfoPageElements.xpath('//a[@class="watch-now"]')[0].get( "href")) art.append( seriesTrailerPageElements.xpath('//div[@class="trailer-player "]') [0].get('data-poster-image')) tagline = "Series: " + seriesInfoPageElements.xpath( '//h1')[0].text_content().strip() summary = seriesInfoPageElements.xpath( '//div[@class="overview"]//p')[0].text_content().strip() genres = detailsPageElements.xpath( '//ul[@id="movie-info-format" and last()]/li/div/a') try: # Series needs to define the Episode Number and pull only actors from that episode actors = detailsPageElements.xpath( '//a[@href="' + thisPage + '" and last()]//following-sibling::div[@class="model-names-wrapper"]/span[@class="model-names"]/a' ) if len(actors) == 0: raise except: # I could put a backup plan here to pull actors from the Series Info page... pass elif "/movies/" in detailsPageURL: movieInfoPageElements = HTML.ElementFromURL( PAsearchSites.getSearchBaseURL(siteID) + detailsPageElements.xpath( '//a[contains(text(),"info")]')[0].get("href")) tagline = "Blockbuster" summary = movieInfoPageElements.xpath( '//div[@class="overview"]//p')[0].text_content().strip() genres = movieInfoPageElements.xpath('//div[@class="box-tag"]/a') if "sceneid" in detailsPageURL: # This is an individual scene from a Blockbuster metadata.collections.add(title) k = detailsPageURL.rfind("=") sceneID = detailsPageURL[k + 1:].strip() sceneImg = movieInfoPageElements.xpath('//img[@alt="' + title + ' - Scene ' + sceneID + '"]')[0].get('data-srcset') k = sceneImg.rfind("/") art.append("https:" + sceneImg[:k + 1] + "1290x726_1.jpg") title = title + ": Scene " + sceneID try: # Pull the actors for just that one scene actors = movieInfoPageElements.xpath( '//h4[text()="Scene ' + sceneID + ': "]//following-sibling::a') if len(actors) == 0: raise except: pass else: # This is a full Blockbuster movie try: actors = movieInfoPageElements.xpath( '//div[@class="box-card model "]/div[@class="title-bar"]/div[@class="title-text"]/div/h4/a' ) if len(actors) == 0: raise except: pass sceneImgs = movieInfoPageElements.xpath( '//div[@class="box-card scene"]/div[@class="preview-image"]/a/img' ) for sceneImg in sceneImgs: imgSrc = sceneImg.get('data-srcset') k = imgSrc.rfind("/") art.append("https:" + imgSrc[:k + 1] + "1290x726_1.jpg") art.append("http:" + movieInfoPageElements.xpath( '//img[@id="front-cover-hd"]')[0].get('src')) art.append("http:" + movieInfoPageElements.xpath( '//img[@id="back-cover-hd"]')[0].get('src')) else: # This must be a Flixxx or Raw Cuts or something else tagline = detailsPageElements.xpath( '//a[contains(@class,"full-scene-button")]')[0].text_content( ).strip() genres = detailsPageElements.xpath( '//ul[@id="movie-info-format" and last()]/li/div/a') try: # Sometimes it just doesn't have a synopsis... summary = detailsPageElements.xpath( '//span[text()="SYNOPSIS"]//following::span')[0].text_content( ).strip() except: pass try: actors = detailsPageElements.xpath( '//span[@class="subtitle" and text()="STARRING"]//following::span[1]//a' ) if len(actors) == 0: raise except: Log("Fallback plan for Actors reached") searchPageElements = HTML.ElementFromURL( PAsearchSites.getSearchSearchURL(siteID) + urllib.quote(title)) actors = searchPageElements.xpath('//h4[contains(text(),"' + title + '")]//following-sibling::a') art.append( detailsPageElements.xpath('//div[@class="trailer-player "]') [0].get('data-poster-image')) art.append( detailsPageElements.xpath('//div[@class="trailer-player "]')[0].get( 'data-poster-image')) tagline = "DP " + tagline metadata.collections.add(tagline) metadata.tagline = tagline metadata.title = title metadata.summary = summary # Genres movieGenres.clearGenres() Log("Genres found: " + str(len(genres))) if len(genres) > 0: for genre in genres: genreName = str(genre.text_content().lower().strip()) movieGenres.addGenre(genreName) # Date try: releaseDate = detailsPageElements.xpath( '//ul[contains(@class,"movie-details")]//span')[0].text_content() except: releaseDate = detailsPageElements.xpath( './/div[@class="release-info"]/div[@class="info-left"]/span[2]' )[0].text_content().strip() if len(releaseDate) > 0: date_object = datetime.strptime(releaseDate, '%m-%d-%Y') metadata.originally_available_at = date_object metadata.year = metadata.originally_available_at.year # Actors movieActors.clearActors() Log("Actors found: " + str(len(actors))) if len(actors) > 0: for actorLink in actors: actorPageURL = actorLink.get("href") if "/model/" in actorPageURL: # dirty hack to filter out the extra actor I was getting that was named for some other scene; actual problem is probably just my xpath search for actors above actorName = str(actorLink.text_content().strip()) actorPage = HTML.ElementFromURL( PAsearchSites.getSearchBaseURL(siteID) + actorPageURL) actorPhotoURL = "https:" + actorPage.xpath( '//div[@class="preview-image"]//img')[0].get("src") movieActors.addActor(actorName, actorPhotoURL) # Posters j = 1 Log("Artwork found: " + str(len(art))) for posterUrl in art: if not posterAlreadyExists(posterUrl, metadata): #Download image file for analysis try: img_file = urllib.urlopen(posterUrl) im = StringIO(img_file.read()) resized_image = Image.open(im) width, height = resized_image.size #Add the image proxy items to the collection if (width > 1): # Item is a poster metadata.posters[posterUrl] = Proxy.Preview(HTTP.Request( posterUrl, headers={ 'Referer': 'http://www.google.com' }).content, sort_order=j) if (width > 100): # Item is an art item metadata.art[posterUrl] = Proxy.Preview(HTTP.Request( posterUrl, headers={ 'Referer': 'http://www.google.com' }).content, sort_order=j) j = j + 1 except: pass return metadata
def search(results, lang, siteNum, searchData): # Advanced Search req = PAutils.HTTPRequest( PAsearchSites.getSearchSearchURL(siteNum) + searchData.encoded) searchResults = HTML.ElementFromString(req.text) for searchResult in searchResults.xpath( '//div[contains(@class, "item-info")]'): titleNoFormatting = searchResult.xpath( './/a')[0].text_content().strip() curID = PAutils.Encode(searchResult.xpath('.//a/@href')[0]) releaseDate = parse( searchResult.xpath('.//span[@class="date"]') [0].text_content().strip()).strftime('%Y-%m-%d') if searchData.date: score = 100 - Util.LevenshteinDistance(searchData.date, releaseDate) else: score = 100 - Util.LevenshteinDistance(searchData.title.lower(), titleNoFormatting.lower()) results.Append( MetadataSearchResult(id='%s|%d' % (curID, siteNum), name='%s [Femdom Empire] %s' % (titleNoFormatting, releaseDate), score=score, lang=lang)) # Difficult Scenes if searchData.title in manualMatch: item = manualMatch[searchData.title] curID = PAutils.Encode(item['curID']) results.Append( MetadataSearchResult(id='%s|%d' % (curID, siteNum), name=item['name'], score=101, lang=lang)) if results: return results # Standard Search else: req = PAutils.HTTPRequest( PAsearchSites.getSearchBaseURL(siteNum) + '/tour/search.php?query=' + searchData.encoded) searchResults = HTML.ElementFromString(req.text) for searchResult in searchResults.xpath( '//div[contains(@class, "item-info")]'): titleNoFormatting = searchResult.xpath( './/a')[0].text_content().strip() scenePage = searchResult.xpath('.//a/@href')[0] curID = PAutils.Encode(scenePage) releaseDate = parse( searchResult.xpath('.//span[@class="date"]') [0].text_content().strip()).strftime('%Y-%m-%d') if searchData.date: score = 100 - Util.LevenshteinDistance(searchData.date, releaseDate) else: score = 100 - Util.LevenshteinDistance( searchData.title.lower(), titleNoFormatting.lower()) results.Append( MetadataSearchResult(id='%s|%d' % (curID, siteNum), name='%s [Femdom Empire] %s' % (titleNoFormatting, releaseDate), score=score, lang=lang)) return results
def search(results, lang, siteNum, searchData): searchResults = [] siteResults = [] temp = [] directID = False count = 0 sceneID = None parts = searchData.title.split() if unicode(parts[0], 'UTF-8').isdigit(): sceneID = parts[0] if int(sceneID) > 100: searchData.title = searchData.title.replace(sceneID, '', 1).strip() movieURL = '%s/%s' % (PAsearchSites.getSearchBaseURL(siteNum), sceneID) searchResults.append(movieURL) directID = True searchData.encoded = searchData.title.replace(' ', '+') searchURL = '%s%s' % (PAsearchSites.getSearchSearchURL(siteNum), searchData.encoded) req = PAutils.HTTPRequest( searchURL, headers={'Referer': 'http://www.data18.empirestores.co'}) searchPageElements = HTML.ElementFromString(req.text) if not directID: for searchResult in searchPageElements.xpath( '//div[@class="product-card"]'): movieURL = '%s%s' % ( PAsearchSites.getSearchBaseURL(siteNum), searchResult.xpath( './div[@class="boxcover-container"]/a/@href')[0]) urlID = searchResult.xpath( './div[@class="boxcover-container"]/a/@href')[0].split("/")[1] if movieURL not in searchResults: titleNoFormatting = PAutils.parseTitle( searchResult.xpath( './div[@class="product-details"]/div/a/text()') [0].strip(), siteNum) curID = PAutils.Encode(movieURL) siteResults.append(movieURL) if sceneID == urlID: score = 100 elif searchData.date and displayDate: score = 80 - Util.LevenshteinDistance( searchData.date, releaseDate) else: score = 80 - Util.LevenshteinDistance( searchData.title.lower(), titleNoFormatting.lower()) if score > 70: sceneURL = PAutils.Decode(curID) req = PAutils.HTTPRequest(sceneURL) detailsPageElements = HTML.ElementFromString(req.text) # Find date on movie specific page date = detailsPageElements.xpath( '//ul[@class="list-unstyled m-b-2"]/li[contains(., "Released:")]/text()' )[0].strip() if date and not date == 'unknown': try: releaseDate = datetime.strptime( date, '%b %d, %Y').strftime('%Y-%m-%d') except: releaseDate = '' else: releaseDate = searchData.dateFormat( ) if searchData.date else '' displayDate = releaseDate if date else '' # Studio try: studio = detailsPageElements.xpath( '//ul[@class="list-unstyled m-b-2"]/li[contains(., "Studio:")]/a/text()' )[0].strip() except: studio = '' if sceneID == urlID: score = 100 elif searchData.date and displayDate: score = 80 - Util.LevenshteinDistance( searchData.date, releaseDate) else: score = 80 - Util.LevenshteinDistance( searchData.title.lower(), titleNoFormatting.lower()) if score == 80: count += 1 temp.append( MetadataSearchResult( id='%s|%d|%s' % (curID, siteNum, releaseDate), name='%s [%s] %s' % (titleNoFormatting, studio, displayDate), score=score, lang=lang)) else: results.Append( MetadataSearchResult( id='%s|%d|%s' % (curID, siteNum, releaseDate), name='%s [%s] %s' % (titleNoFormatting, studio, displayDate), score=score, lang=lang)) # Split Scenes scenes = detailsPageElements.xpath( '//div[@class="product-details-container"]/div[@class="container"]/div[@class="row"]' ) sceneCount = (len(scenes) - 1) / 2 for sceneNum in range(0, sceneCount): section = 'Scene %d' % (sceneNum + 1) actorNames = ', '.join( detailsPageElements.xpath( '//div[@class="container"]/div[@class="row"][./div[@class="col-sm-6 text-right text-left-xs m-b-1"]][%d]/div[2]/div/a/text()' % (sceneNum + 1))) if score == 80: count += 1 temp.append( MetadataSearchResult( id='%s|%d|%s|%d' % (curID, siteNum, releaseDate, sceneNum + 1), name='%s/#%d[%s][%s] %s' % (titleNoFormatting, sceneNum + 1, actorNames, studio, displayDate), score=score, lang=lang)) else: results.Append( MetadataSearchResult( id='%s|%d|%s|%d' % (curID, siteNum, releaseDate, sceneNum + 1), name='%s/#%d[%s][%s] %s' % (titleNoFormatting, sceneNum + 1, actorNames, studio, displayDate), score=score, lang=lang)) else: if score == 80: count += 1 temp.append( MetadataSearchResult( id='%s|%d|%s' % (curID, siteNum, releaseDate), name='%s %s' % (titleNoFormatting, displayDate), score=score, lang=lang)) else: results.Append( MetadataSearchResult( id='%s|%d|%s' % (curID, siteNum, releaseDate), name='%s %s' % (titleNoFormatting, displayDate), score=score, lang=lang)) googleResults = PAutils.getFromGoogleSearch(searchData.title, siteNum) for movieURL in googleResults: if ('/movies/' in movieURL and '.html' not in movieURL and movieURL not in searchResults and movieURL not in siteResults): searchResults.append(movieURL) for movieURL in searchResults: req = PAutils.HTTPRequest(movieURL) detailsPageElements = HTML.ElementFromString(req.text) urlID = re.sub(r'.*/', '', movieURL) titleNoFormatting = PAutils.parseTitle( detailsPageElements.xpath('//h1/text()')[0].strip(), siteNum) curID = PAutils.Encode(movieURL) date = detailsPageElements.xpath( '//ul[@class="list-unstyled m-b-2"]/li[contains(., "Released:")]/text()' )[0].strip() if date and not date == 'unknown': try: releaseDate = datetime.strptime( date, '%b %d %Y').strftime('%Y-%m-%d') except: releaseDate = '' else: releaseDate = searchData.dateFormat() if searchData.date else '' displayDate = releaseDate if date else '' if sceneID == urlID: score = 100 elif searchData.date and displayDate: score = 80 - Util.LevenshteinDistance(searchData.date, releaseDate) else: score = 80 - Util.LevenshteinDistance(searchData.title.lower(), titleNoFormatting.lower()) # Studio try: studio = detailsPageElements.xpath( '//ul[@class="list-unstyled m-b-2"]/li[contains(., "Studio:")]/a/text()' )[0].strip() except: studio = '' if score == 80: count += 1 temp.append( MetadataSearchResult(id='%s|%d|%s' % (curID, siteNum, releaseDate), name='%s [%s] %s' % (titleNoFormatting, studio, displayDate), score=score, lang=lang)) else: results.Append( MetadataSearchResult(id='%s|%d|%s' % (curID, siteNum, releaseDate), name='%s [%s] %s' % (titleNoFormatting, studio, displayDate), score=score, lang=lang)) # Split Scenes scenes = detailsPageElements.xpath( '//div[@class="product-details-container"]/div[@class="container"]/div[@class="row"]' ) sceneCount = (len(scenes) - 1) / 2 for sceneNum in range(0, sceneCount): actorNames = ', '.join( detailsPageElements.xpath( '//div[@class="container"]/div[@class="row"][./div[@class="col-sm-6 text-right text-left-xs m-b-1"]][%d]/div[2]/div/a/text()' % (sceneNum + 1))) if score == 80: count += 1 temp.append( MetadataSearchResult( id='%s|%d|%s|%d' % (curID, siteNum, releaseDate, sceneNum + 1), name='%s/#%d[%s][%s] %s' % (titleNoFormatting, sceneNum + 1, actorNames, studio, displayDate), score=score, lang=lang)) else: results.Append( MetadataSearchResult( id='%s|%d|%s|%d' % (curID, siteNum, releaseDate, sceneNum + 1), name='%s/#%d[%s][%s] %s' % (titleNoFormatting, sceneNum + 1, actorNames, studio, displayDate), score=score, lang=lang)) for result in temp: if count > 1 and result.score == 80: results.Append( MetadataSearchResult(id=result.id, name=result.name, score=79, lang=lang)) else: results.Append( MetadataSearchResult(id=result.id, name=result.name, score=result.score, lang=lang)) return results
def update(metadata, siteID, movieGenres, movieActors): metadata_id = str(metadata.id).split('|') sceneName = metadata_id[0] releaseDate = metadata_id[2] contentName = metadata_id[3] detailsPageElements = getJSONfromPage( PAsearchSites.getSearchSearchURL(siteID) + sceneName)[contentName][sceneName] # Studio metadata.studio = 'Mylf' # Title metadata.title = detailsPageElements['title'] # Summary metadata.summary = detailsPageElements['description'] # Tagline and Collection(s) metadata.collections.clear() if 'site' in detailsPageElements: subSite = detailsPageElements['site']['name'] else: subSite = PAsearchSites.getSearchSiteName(siteID) metadata.tagline = subSite metadata.collections.add(subSite) # Release Date if releaseDate: date_object = parse(releaseDate) metadata.originally_available_at = date_object metadata.year = metadata.originally_available_at.year # Actors movieActors.clearActors() actors = detailsPageElements['models'] for actorLink in actors: actorID = actorLink['modelId'] actorName = actorLink['modelName'] actorPhotoURL = '' actorData = getJSONfromPage( '%s/models/%s' % (PAsearchSites.getSearchBaseURL(siteID), actorID)) if actorData: actorPhotoURL = actorData['modelsContent'][actorID]['img'] movieActors.addActor(actorName, actorPhotoURL) # Genres movieGenres.clearGenres() genres = ["MILF", "Mature"] if subSite.lower() == "MylfBoss".lower(): for genreName in ['Office', 'Boss']: movieGenres.addGenre(genreName) elif subSite.lower() == "MylfBlows".lower(): for genreName in ['B*****b']: movieGenres.addGenre(genreName) elif subSite.lower() == "Milfty".lower(): for genreName in ['Cheating']: movieGenres.addGenre(genreName) elif subSite.lower() == "Mom Drips".lower(): for genreName in ['Creampie']: movieGenres.addGenre(genreName) elif subSite.lower() == "Milf Body".lower(): for genreName in ['Gym', 'Fitness']: movieGenres.addGenre(genreName) elif subSite.lower() == "Lone Milf".lower(): for genreName in ['Solo']: movieGenres.addGenre(genreName) elif subSite.lower() == "Full Of JOI".lower(): for genreName in ['JOI']: movieGenres.addGenre(genreName) elif subSite.lower() == "Mylfed".lower(): for genreName in ['Lesbian', 'Girl on Girl', 'GG']: movieGenres.addGenre(genreName) elif subSite.lower() == "MylfDom".lower(): for genreName in ['BDSM']: movieGenres.addGenre(genreName) if (len(actors) > 1) and subSite != "Mylfed": genres.append("Threesome") for genre in genres: movieGenres.addGenre(genre) # Posters art = [detailsPageElements['img']] Log('Artwork found: %d' % len(art)) headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36' } for idx, posterUrl in enumerate(art, 1): if not PAsearchSites.posterAlreadyExists(posterUrl, metadata): # Download image file for analysis try: req = urllib.Request(posterUrl, headers=headers) img_file = urllib.urlopen(req) im = StringIO(img_file.read()) resized_image = Image.open(im) width, height = resized_image.size # Add the image proxy items to the collection if width > 1: # Item is a poster metadata.posters[posterUrl] = Proxy.Media(HTTP.Request( posterUrl, headers=headers).content, sort_order=idx) if width > 100 and width > height: # Item is an art item metadata.art[posterUrl] = Proxy.Media(HTTP.Request( posterUrl, headers=headers).content, sort_order=idx) except: pass return metadata
def update(metadata, siteNum, movieGenres, movieActors): metadata_id = str(metadata.id).split('|') sceneURL = PAsearchSites.getSearchSearchURL(siteNum) + metadata_id[0] req = PAutils.HTTPRequest(sceneURL) detailsPageElements = HTML.ElementFromString(req.text) # Title metadata.title = detailsPageElements.xpath( '//h1')[0].text_content().strip() # Summary metadata.summary = detailsPageElements.xpath( '//div[@class="video-group-bottom"]/p | //p[@class="u-lh--opt"] | //div[@class="video-info"]/p | //div[@class="desc"]' )[0].text_content().strip() # Studio metadata.studio = 'HighTechVR' # Tagline and Collection metadata.collections.clear() tagline = detailsPageElements.xpath('//title')[0].text_content().split( '|')[1].strip() metadata.tagline = tagline metadata.collections.add(tagline) # Release Date date = detailsPageElements.xpath( '//span[@class="date-display-single"] | //span[@class="u-inline-block u-mr--nine"] | //div[@class="video-meta-date"] | //div[@class="date"]' )[0].text_content().strip() date_object = parse(date) metadata.originally_available_at = date_object metadata.year = metadata.originally_available_at.year # Genres movieGenres.clearGenres() for genreLink in detailsPageElements.xpath( '//div[contains(@class, "video-tags")]//a | //div[@class="tags"]//a' ): genreName = genreLink.text_content().strip() movieGenres.addGenre(genreName) # Actors movieActors.clearActors() for actorLink in detailsPageElements.xpath( '//div[@class="video-actress-name"]//a | //div[@class="u-mt--three u-mb--three"]//a | //div[@class="model-one-inner js-trigger-lazy-item"]//a | //div[@class="featuring commed"]//a' ): actorName = actorLink.text_content().strip() actorPageURL = PAsearchSites.getSearchBaseURL(siteNum) + actorLink.get( 'href') req = PAutils.HTTPRequest(actorPageURL) actorPage = HTML.ElementFromString(req.text) actorPhotoURL = actorPage.xpath( '//div[contains(@class, "model-img-wrapper")]/figure/a/img | //div[contains(@class, "u-ratio--model-poster")]//img | //div[contains(@class, "model-one-inner")]//img | //div[contains(@class, "row actor-info")]//img' )[0].get('src').split('?')[0] movieActors.addActor(actorName, actorPhotoURL) # Posters art = [] for poster in detailsPageElements.xpath( '//div[contains(@class,"video-gallery")]//div//figure//a | //a[@class="u-block u-ratio u-ratio--lightbox u-bgc--back-opt u-z--zero"] | //div[@class="scene-previews-container"]//a' ): img = poster.get('href').split('?')[0] if img.startswith('http'): art.append(img) poster = detailsPageElements.xpath( '//div[@class="splash-screen fullscreen-message is-visible"]/@style' )[0] img = poster.split('url(')[1].split(')')[0] art.append(img) Log('Artwork found: %d' % len(art)) for idx, posterUrl in enumerate(art, 1): if not PAsearchSites.posterAlreadyExists(posterUrl, metadata): # Download image file for analysis try: image = PAutils.HTTPRequest(posterUrl) im = StringIO(image.content) resized_image = Image.open(im) width, height = resized_image.size # Add the image proxy items to the collection if width > 1: # Item is a poster metadata.posters[posterUrl] = Proxy.Media(image.content, sort_order=idx) if width > 100: # Item is an art item metadata.art[posterUrl] = Proxy.Media(image.content, sort_order=idx) except: pass return metadata