def update(metadata, lang, siteNum, movieGenres, movieActors): metadata_id = str(metadata.id).split('|') sceneURL = PAutils.Decode(metadata_id[0]) sceneDate = metadata_id[2] req = PAutils.HTTPRequest(sceneURL) detailsPageElements = HTML.ElementFromString(req.text) if len(metadata_id) > 3: Log('Switching to Data18Content') siteData18Content.update(metadata, lang, siteNum, movieGenres, movieActors) return metadata # Title metadata.title = PAutils.parseTitle(detailsPageElements.xpath('//h1')[0].text_content(), siteNum) # Summary summary = detailsPageElements.xpath('//div[@class="gen12"]/p[contains(., "Description")]')[0].text_content().split(':', 1)[1].strip() if len(summary) > 1: metadata.summary = summary # Studio try: studio = detailsPageElements.xpath('//i[contains(., "Network")]//preceding-sibling::a[1]')[0].text_content().strip() except: try: studio = detailsPageElements.xpath('//i[contains(., "Studio")]//preceding-sibling::a[1]')[0].text_content().strip() except: try: studio = detailsPageElements.xpath('//i[contains(., "Site")]//preceding-sibling::a[1]')[0].text_content().strip() except: studio = '' if studio: metadata.studio = studio # Tagline and Collection(s) metadata.collections.clear() metadata.collections.add(metadata.studio) try: tagline = detailsPageElements.xpath('//p[contains(., "Serie")]//a[@title]')[0].text_content().strip() metadata.collections.add(tagline) except: pass # Release Date if sceneDate: date_object = parse(sceneDate) metadata.originally_available_at = date_object metadata.year = metadata.originally_available_at.year else: date_object = parse(detailsPageElements.xpath('//div[@class="gen12"]/p[contains(., "Release")]')[0].text_content().split(':')[2].strip()) metadata.originally_available_at = date_object metadata.year = metadata.originally_available_at.year # Genres movieGenres.clearGenres() for genreLink in detailsPageElements.xpath('//p[./b[contains(., "Categories")]]//a'): genreName = genreLink.text_content().strip() movieGenres.addGenre(genreName) # Actors movieActors.clearActors() actors = detailsPageElements.xpath('//div[./p[span[@class="gen11"]]]//a') for actorLink in actors: actorName = actorLink.text_content().strip() actorPhotoURL = '' if actorName: movieActors.addActor(actorName, actorPhotoURL) # Director metadata.directors.clear() director = metadata.directors.new() try: directorName = detailsPageElements.xpath('//p[./b[contains(., "Director")]]')[0].text_content().split(':')[2].strip() if not directorName == 'Unknown': director.name = directorName except: pass # Posters art = [] xpaths = [ '//a[@data-featherlight="image"]/@href', '//img[contains(@src, "th5")]/@src', ] try: for xpath in xpaths: for img in detailsPageElements.xpath(xpath): art.append(img.replace('/th5', '')) except: pass images = [] posterExists = False Log('Artwork found: %d' % len(art)) for idx, posterUrl in enumerate(art, 1): if not PAsearchSites.posterAlreadyExists(posterUrl, metadata): # Download image file for analysis try: image = PAutils.HTTPRequest(posterUrl, headers={'Referer': 'http://www.data18.com'}) images.append(image) im = StringIO(image.content) resized_image = Image.open(im) width, height = resized_image.size # Add the image proxy items to the collection if height > width: # Item is a poster posterExists = True metadata.posters[posterUrl] = Proxy.Media(image.content, sort_order=idx) if width > height: # Item is an art item metadata.art[posterUrl] = Proxy.Media(image.content, sort_order=idx) except: pass if not posterExists: for idx, image in enumerate(images, 1): try: im = StringIO(image.content) resized_image = Image.open(im) width, height = resized_image.size # Add the image proxy items to the collection if width > 1: # Item is a poster metadata.posters[art[idx - 1]] = Proxy.Media(image.content, sort_order=idx) except: pass return metadata
def search(results, lang, siteNum, searchData): searchResults = [] siteResults = [] temp = [] count = 0 sceneID = None parts = searchData.title.split() if unicode(parts[0], 'UTF-8').isdigit(): sceneID = parts[0] if int(sceneID) > 100: searchData.title = searchData.title.replace(sceneID, '', 1).strip() movieURL = '%s/movies/%s' % (PAsearchSites.getSearchBaseURL(siteNum), sceneID) searchResults.append(movieURL) searchData.encoded = searchData.title.replace(' ', '+') searchURL = '%s%s' % (PAsearchSites.getSearchSearchURL(siteNum), searchData.encoded) req = PAutils.HTTPRequest(searchURL, headers={'Referer': 'http://www.data18.com'}) searchPageElements = HTML.ElementFromString(req.text) for searchResult in searchPageElements.xpath('//a[contains(@href, "movies")]//parent::div[contains(@style, "float: left; padding")]'): movieURL = searchResult.xpath('.//*[img]/@href')[0] urlID = re.sub(r'.*/', '', movieURL) if movieURL not in searchResults: titleNoFormatting = PAutils.parseTitle(searchResult.xpath('.//*[contains(@href, "movies")]')[1].text_content(), siteNum) curID = PAutils.Encode(movieURL) siteResults.append(movieURL) date = searchResult.text if date and not date == 'unknown': try: releaseDate = datetime.strptime(date, '%Y%m%d').strftime('%Y-%m-%d') except: releaseDate = '' else: releaseDate = searchData.dateFormat() if searchData.date else '' displayDate = releaseDate if date else '' if sceneID == urlID: score = 100 elif searchData.date and displayDate: score = 80 - Util.LevenshteinDistance(searchData.date, releaseDate) else: score = 80 - Util.LevenshteinDistance(searchData.title.lower(), titleNoFormatting.lower()) if score > 70: sceneURL = PAutils.Decode(curID) req = PAutils.HTTPRequest(sceneURL) detailsPageElements = HTML.ElementFromString(req.text) # Studio try: studio = detailsPageElements.xpath('//i[contains(., "Network")]//preceding-sibling::a[1]')[0].text_content().strip() except: try: studio = detailsPageElements.xpath('//i[contains(., "Studio")]//preceding-sibling::a[1]')[0].text_content().strip() except: try: studio = detailsPageElements.xpath('//i[contains(., "Site")]//preceding-sibling::a[1]')[0].text_content().strip() except: studio = '' if score == 80: count += 1 temp.append(MetadataSearchResult(id='%s|%d|%s' % (curID, siteNum, releaseDate), name='%s [%s] %s' % (titleNoFormatting, studio, displayDate), score=score, lang=lang)) else: results.Append(MetadataSearchResult(id='%s|%d|%s' % (curID, siteNum, releaseDate), name='%s [%s] %s' % (titleNoFormatting, studio, displayDate), score=score, lang=lang)) #Split Scenes sceneCount = detailsPageElements.xpath('//text()[contains(., "Related Scenes")]')[0][-2] if sceneCount.isdigit(): sceneCount = int(sceneCount) else: sceneCount = 0 for sceneNum in range(1,sceneCount + 1): section = "Scene " + str(sceneNum) scene = PAutils.Encode(detailsPageElements.xpath('//a[contains(., "%s")]/@href' % (section))[0]) if score == 80: count += 1 temp.append(MetadataSearchResult(id='%s|%d|%s|%s|%d' % (scene, siteNum, releaseDate, titleNoFormatting, sceneNum), name='%s [%s][%s] %s' % (titleNoFormatting, section, studio, displayDate), score=score, lang=lang)) else: results.Append(MetadataSearchResult(id='%s|%d|%s|%s|%d' % (scene, siteNum, releaseDate, titleNoFormatting, sceneNum), name='%s [%s][%s] %s' % (titleNoFormatting, section, studio, displayDate), score=score, lang=lang)) else: if score == 80: count += 1 temp.append(MetadataSearchResult(id='%s|%d|%s' % (curID, siteNum, releaseDate), name='%s %s' % (titleNoFormatting, displayDate), score=score, lang=lang)) else: results.Append(MetadataSearchResult(id='%s|%d|%s' % (curID, siteNum, releaseDate), name='%s %s' % (titleNoFormatting, displayDate), score=score, lang=lang)) googleResults = PAutils.getFromGoogleSearch(searchData.title, siteNum) for movieURL in googleResults: if ('/movies/' in movieURL and '.html' not in movieURL and movieURL not in searchResults and movieURL not in siteResults): searchResults.append(movieURL) for movieURL in searchResults: req = PAutils.HTTPRequest(movieURL) detailsPageElements = HTML.ElementFromString(req.text) urlID = re.sub(r'.*/', '', movieURL) try: siteName = detailsPageElements.xpath('//i[contains(., "Network")]//preceding-sibling::a[1]')[0].text_content().strip() except: try: siteName = detailsPageElements.xpath('//i[contains(., "Studio")]//preceding-sibling::a[1]')[0].text_content().strip() except: try: siteName = detailsPageElements.xpath('//i[contains(., "Site")]//preceding-sibling::a[1]')[0].text_content().strip() except: siteName = '' titleNoFormatting = PAutils.parseTitle(detailsPageElements.xpath('//h1')[0].text_content(), siteNum) curID = PAutils.Encode(movieURL) try: date = detailsPageElements.xpath('//p[contains(., "Release")]')[0].text_content().text_content().split(':')[2].strip() except: date = '' if date and not date == 'unknown': releaseDate = parse(date).strftime('%Y-%m-%d') else: releaseDate = searchData.dateFormat() if searchData.date else '' displayDate = releaseDate if date else '' if sceneID == urlID: score = 100 elif searchData.date and displayDate: score = 80 - Util.LevenshteinDistance(searchData.date, releaseDate) else: score = 80 - Util.LevenshteinDistance(searchData.title.lower(), titleNoFormatting.lower()) # Studio try: studio = detailsPageElements.xpath('//i[contains(., "Network")]//preceding-sibling::a[1]')[0].text_content().strip() except: try: studio = detailsPageElements.xpath('//i[contains(., "Studio")]//preceding-sibling::a[1]')[0].text_content().strip() except: try: studio = detailsPageElements.xpath('//i[contains(., "Site")]//preceding-sibling::a[1]')[0].text_content().strip() except: studio = '' if score == 80: count += 1 temp.append(MetadataSearchResult(id='%s|%d|%s' % (curID, siteNum, releaseDate), name='%s [%s] %s' % (titleNoFormatting, studio, displayDate), score=score, lang=lang)) else: results.Append(MetadataSearchResult(id='%s|%d|%s' % (curID, siteNum, releaseDate), name='%s [%s] %s' % (titleNoFormatting, studio, displayDate), score=score, lang=lang)) #Split Scenes sceneCount = detailsPageElements.xpath('//text()[contains(., "Related Scenes")]')[0][-2] if sceneCount.isdigit(): sceneCount = int(sceneCount) else: sceneCount = 0 for sceneNum in range(1,sceneCount + 1): section = "Scene " + str(sceneNum) scene = PAutils.Encode(detailsPageElements.xpath('//a[contains(., "%s")]/@href' % (section))[0]) if score == 80: count += 1 temp.append(MetadataSearchResult(id='%s|%d|%s|%s|%d' % (scene, siteNum, releaseDate, titleNoFormatting, sceneNum), name='%s [%s][%s] %s' % (titleNoFormatting, section, studio, displayDate), score=score, lang=lang)) else: results.Append(MetadataSearchResult(id='%s|%d|%s|%s|%d' % (scene, siteNum, releaseDate, titleNoFormatting, sceneNum), name='%s [%s][%s] %s' % (titleNoFormatting, section, studio, displayDate), score=score, lang=lang)) for result in temp: if count > 1 and result.score == 80: results.Append(MetadataSearchResult(id=result.id, name=result.name, score=79, lang=lang)) else: results.Append(MetadataSearchResult(id=result.id, name=result.name, score=result.score, lang=lang)) return results
def update(metadata, lang, siteNum, movieGenres, movieActors): metadata_id = str(metadata.id).split('|') sceneURL = PAutils.Decode(metadata_id[0]) if not sceneURL.startswith('http'): sceneURL = PAsearchSites.getSearchBaseURL(siteNum) + sceneURL sceneDate = metadata_id[2] req = PAutils.HTTPRequest(sceneURL) detailsPageElements = HTML.ElementFromString(req.text) # Title metadata.title = PAutils.parseTitle(detailsPageElements.xpath('//h1')[0].text_content(), siteNum).replace('’', '\'') # Summary metadata.summary = detailsPageElements.xpath('//div[@class="vdoDesc"]')[0].text_content().strip() # Studio metadata.studio = 'Bang Bros' # Tagline and Collection(s) metadata.collections.clear() tagline = detailsPageElements.xpath('//a[contains(@href, "/websites")]')[1].text_content().strip() metadata.tagline = tagline if Prefs['collections_addsitename']: metadata.collections.add(metadata.tagline) # Release Date if sceneDate: date_object = parse(sceneDate) metadata.originally_available_at = date_object metadata.year = metadata.originally_available_at.year # Genres movieGenres.clearGenres() for genreLink in detailsPageElements.xpath('//div[contains(@class, "vdoTags")]//a'): genreName = genreLink.text_content().strip() movieGenres.addGenre(genreName) # Actors movieActors.clearActors() for actorLink in detailsPageElements.xpath('//div[@class="vdoCast"]//a[contains(@href, "/model")]'): actorName = actorLink.text_content().strip() actorPageURL = actorLink.get('href') req = PAutils.HTTPRequest(PAsearchSites.getSearchBaseURL(siteNum) + actorPageURL) actorPage = HTML.ElementFromString(req.text) actorPhotoURL = 'http:' + actorPage.xpath('//div[@class="profilePic_in"]//img/@src')[0] movieActors.addActor(actorName, actorPhotoURL) # Posters art = [] xpaths = [ '//img[contains(@id, "player-overlay-image")]/@src', '//div[@class="WdgtPic modal-overlay"]//img/@src' ] for xpath in xpaths: for poster in detailsPageElements.xpath(xpath): if not poster.startswith('http'): poster = 'http:' + poster if 'big' not in poster: (poster, filename) = poster.rsplit('/', 1) poster = poster + '/big' + filename art.append(poster) Log('Artwork found: %d' % len(art)) for idx, posterUrl in enumerate(art, 1): if not PAsearchSites.posterAlreadyExists(posterUrl, metadata): # Download image file for analysis try: image = PAutils.HTTPRequest(posterUrl) im = StringIO(image.content) resized_image = Image.open(im) width, height = resized_image.size # Add the image proxy items to the collection if width > 1: # Item is a poster metadata.posters[posterUrl] = Proxy.Media(image.content, sort_order=idx) if width > 100: # Item is an art item metadata.art[posterUrl] = Proxy.Media(image.content, sort_order=idx) except: pass return metadata
def search(results, lang, siteNum, searchData): searchResults = [] siteResults = [] temp = [] directID = False count = 0 sceneID = None parts = searchData.title.split() if unicode(parts[0], 'UTF-8').isdigit(): sceneID = parts[0] if int(sceneID) > 100: searchData.title = searchData.title.replace(sceneID, '', 1).strip() movieURL = '%s/%s' % (PAsearchSites.getSearchBaseURL(siteNum), sceneID) searchResults.append(movieURL) directID = True searchData.encoded = searchData.title.replace(' ', '+') searchURL = '%s%s' % (PAsearchSites.getSearchSearchURL(siteNum), searchData.encoded) req = PAutils.HTTPRequest( searchURL, headers={'Referer': 'http://www.data18.empirestores.co'}) searchPageElements = HTML.ElementFromString(req.text) if not directID: for searchResult in searchPageElements.xpath('//a[@class="boxcover"]'): movieURL = '%s%s' % (PAsearchSites.getSearchBaseURL(siteNum), searchResult.xpath('./@href')[0]) urlID = searchResult.xpath('./@href')[0].split("/")[1] if movieURL not in searchResults: titleNoFormatting = PAutils.parseTitle( searchResult.xpath('./span/span/text()')[0].strip(), siteNum) curID = PAutils.Encode(movieURL) siteResults.append(movieURL) if sceneID == urlID: score = 100 elif searchData.date and displayDate: score = 80 - Util.LevenshteinDistance( searchData.date, releaseDate) else: score = 80 - Util.LevenshteinDistance( searchData.title.lower(), titleNoFormatting.lower()) if score > 70: sceneURL = PAutils.Decode(curID) req = PAutils.HTTPRequest(sceneURL) detailsPageElements = HTML.ElementFromString(req.text) #Find date on movie specific page date = detailsPageElements.xpath( '//div[@class="release-date" and ./span[contains(., "Released:")]]/text()' )[0].strip() if date and not date == 'unknown': try: releaseDate = datetime.strptime( date, '%b %d, %Y').strftime('%Y-%m-%d') except: releaseDate = '' else: releaseDate = searchData.dateFormat( ) if searchData.date else '' displayDate = releaseDate if date else '' # Studio try: studio = detailsPageElements.xpath( '//div[@class="studio"]/a/text()')[0].strip() except: studio = '' if sceneID == urlID: score = 100 elif searchData.date and displayDate: score = 80 - Util.LevenshteinDistance( searchData.date, releaseDate) else: score = 80 - Util.LevenshteinDistance( searchData.title.lower(), titleNoFormatting.lower()) if score == 80: count += 1 temp.append( MetadataSearchResult( id='%s|%d|%s' % (curID, siteNum, releaseDate), name='%s [%s] %s' % (titleNoFormatting, studio, displayDate), score=score, lang=lang)) else: results.Append( MetadataSearchResult( id='%s|%d|%s' % (curID, siteNum, releaseDate), name='%s [%s] %s' % (titleNoFormatting, studio, displayDate), score=score, lang=lang)) # Split Scenes scenes = detailsPageElements.xpath( '//div[@class="item-grid item-grid-scene"]/div/a/@href' ) sceneCount = len(scenes) for sceneNum in range(0, sceneCount): section = "Scene %d" % (sceneNum + 1) if score == 80: count += 1 temp.append( MetadataSearchResult( id='%s|%d|%s|%d' % (curID, siteNum, releaseDate, sceneNum), name='%s [%s][%s] %s' % (titleNoFormatting, section, studio, displayDate), score=score, lang=lang)) else: results.Append( MetadataSearchResult( id='%s|%d|%s|%d' % (curID, siteNum, releaseDate, sceneNum), name='%s [%s][%s] %s' % (titleNoFormatting, section, studio, displayDate), score=score, lang=lang)) else: if score == 80: count += 1 temp.append( MetadataSearchResult( id='%s|%d|%s' % (curID, siteNum, releaseDate), name='%s %s' % (titleNoFormatting, displayDate), score=score, lang=lang)) else: results.Append( MetadataSearchResult( id='%s|%d|%s' % (curID, siteNum, releaseDate), name='%s %s' % (titleNoFormatting, displayDate), score=score, lang=lang)) googleResults = PAutils.getFromGoogleSearch(searchData.title, siteNum) for movieURL in googleResults: if ('/movies/' in movieURL and '.html' not in movieURL and movieURL not in searchResults and movieURL not in siteResults): searchResults.append(movieURL) for movieURL in searchResults: req = PAutils.HTTPRequest(movieURL) detailsPageElements = HTML.ElementFromString(req.text) urlID = re.sub(r'.*/', '', movieURL) titleNoFormatting = PAutils.parseTitle( detailsPageElements.xpath('//h1[@class="description"]/text()') [0].strip(), siteNum) curID = PAutils.Encode(movieURL) date = detailsPageElements.xpath( '//div[@class="release-date" and ./span[contains(., "Released:")]]/text()' )[0].strip() if date and not date == 'unknown': try: releaseDate = datetime.strptime( date, '%b %d, %Y').strftime('%Y-%m-%d') except: releaseDate = '' else: releaseDate = searchData.dateFormat() if searchData.date else '' displayDate = releaseDate if date else '' if sceneID == urlID: score = 100 elif searchData.date and displayDate: score = 80 - Util.LevenshteinDistance(searchData.date, releaseDate) else: score = 80 - Util.LevenshteinDistance(searchData.title.lower(), titleNoFormatting.lower()) # Studio try: studio = detailsPageElements.xpath( '//div[@class="studio"]/a/text()')[0].strip() except: studio = '' if score == 80: count += 1 temp.append( MetadataSearchResult(id='%s|%d|%s' % (curID, siteNum, releaseDate), name='%s [%s] %s' % (titleNoFormatting, studio, displayDate), score=score, lang=lang)) else: results.Append( MetadataSearchResult(id='%s|%d|%s' % (curID, siteNum, releaseDate), name='%s [%s] %s' % (titleNoFormatting, studio, displayDate), score=score, lang=lang)) # Split Scenes scenes = detailsPageElements.xpath( '//div[@class="item-grid item-grid-scene"]/div/a/@href') sceneCount = len(scenes) for sceneNum in range(1, sceneCount + 1): section = "Scene %d" % (sceneNum) if score == 80: count += 1 temp.append( MetadataSearchResult( id='%s|%d|%s|%d' % (curID, siteNum, releaseDate, sceneNum), name='%s [%s][%s] %s' % (titleNoFormatting, section, studio, displayDate), score=score, lang=lang)) else: results.Append( MetadataSearchResult( id='%s|%d|%s|%d' % (curID, siteNum, releaseDate, sceneNum), name='%s [%s][%s] %s' % (titleNoFormatting, section, studio, displayDate), score=score, lang=lang)) for result in temp: if count > 1 and result.score == 80: results.Append( MetadataSearchResult(id=result.id, name=result.name, score=79, lang=lang)) else: results.Append( MetadataSearchResult(id=result.id, name=result.name, score=result.score, lang=lang)) return results
def update(metadata, siteNum, movieGenres, movieActors): metadata_id = str(metadata.id).split('|') sceneName = metadata_id[0] sceneDate = metadata_id[2] sceneType = metadata_id[3] dbURL = getDBURL(PAsearchSites.getSearchBaseURL(siteNum)) detailsPageElements = getDataFromAPI(dbURL, sceneType, sceneName, siteNum) # Title metadata.title = PAutils.parseTitle(detailsPageElements['title'], siteNum) # Summary metadata.summary = detailsPageElements['description'] # Studio metadata.studio = 'TeamSkeet' # Collections / Tagline siteName = detailsPageElements['site']['name'] if 'site' in detailsPageElements else PAsearchSites.getSearchSiteName(siteNum) metadata.collections.clear() metadata.tagline = siteName metadata.collections.add(siteName) # Release Date if sceneDate: date_object = parse(sceneDate) metadata.originally_available_at = date_object metadata.year = metadata.originally_available_at.year # Genres if 'tags' in detailsPageElements and detailsPageElements['tags']: for genreLink in detailsPageElements['tags']: genreName = genreLink.strip() movieGenres.addGenre(genreName) if siteName == 'Sis Loves Me': movieGenres.addGenre('Step Sister') elif siteName == 'DadCrush' or siteName == 'DaughterSwap': movieGenres.addGenre('Step Dad') movieGenres.addGenre('Step Daughter') elif siteName == 'PervMom': movieGenres.addGenre('Step Mom') elif siteName == 'Family Strokes': movieGenres.addGenre('Taboo Family') elif siteName == 'Foster Tapes': movieGenres.addGenre('Taboo Sex') elif siteName == 'BFFs': movieGenres.addGenre('Teen') movieGenres.addGenre('Group Sex') elif siteName == 'Shoplyfter': movieGenres.addGenre('Strip') elif siteName == 'ShoplyfterMylf': movieGenres.addGenre('Strip') movieGenres.addGenre('MILF') elif siteName == 'Exxxtra Small': movieGenres.addGenre('Teen') movieGenres.addGenre('Small T**s') elif siteName == 'Little Asians': movieGenres.addGenre('Asian') movieGenres.addGenre('Teen') elif siteName == 'TeenJoi': movieGenres.addGenre('Teen') movieGenres.addGenre('JOI') elif siteName == 'Black Valley Girls': movieGenres.addGenre('Teen') movieGenres.addGenre('Ebony') elif siteName == 'Thickumz': movieGenres.addGenre('Thick') elif siteName == 'Dyked': movieGenres.addGenre('Hardcore') movieGenres.addGenre('Teen') movieGenres.addGenre('Lesbian') elif siteName == 'Teens Love Black Cocks': movieGenres.addGenre('Teens') movieGenres.addGenre('BBC') elif siteName == 'Teen Curves': movieGenres.addGenre('Big Ass') elif siteName == 'Titty Attack': movieGenres.addGenre('Big T**s') elif siteName == 'Teeny Black': movieGenres.addGenre('Teen') movieGenres.addGenre('Ebony') elif siteName == 'Teens Do P**n': movieGenres.addGenre('Teen') elif siteName == 'Teen Pies': movieGenres.addGenre('Teen') movieGenres.addGenre('Creampie') elif siteName == 'POV Life': movieGenres.addGenre('POV') elif siteName == 'Ginger Patch': movieGenres.addGenre('Redhead') elif siteName == 'Innocent High': movieGenres.addGenre('School Girl') elif siteName == 'Oye Loca': movieGenres.addGenre('Latina') # Actors movieActors.clearActors() actors = detailsPageElements['models'] for actorLink in actors: actorData = getDataFromAPI(dbURL, 'modelscontent', actorLink['modelId'], siteNum) if actorData: actorName = actorData['name'] actorPhotoURL = actorData['img'] movieActors.addActor(actorName, actorPhotoURL) # Posters art = [ detailsPageElements['img'] ] Log('Artwork found: %d' % len(art)) for idx, posterUrl in enumerate(art, 1): if not PAsearchSites.posterAlreadyExists(posterUrl, metadata): # Download image file for analysis try: image = PAutils.HTTPRequest(posterUrl) im = StringIO(image.content) resized_image = Image.open(im) width, height = resized_image.size # Add the image proxy items to the collection if width > 1: # Item is a poster metadata.posters[posterUrl] = Proxy.Media(image.content, sort_order=idx) if width > 100 and width > height: # Item is an art item metadata.art[posterUrl] = Proxy.Media(image.content, sort_order=idx) except: pass return metadata
def getSearchSettings(mediaTitle): Log('mediaTitle w/ possible abbreviation: %s' % mediaTitle) for abbreviation, full in PAsiteList.abbreviations: r = re.compile(abbreviation, flags=re.IGNORECASE) if r.match(mediaTitle): mediaTitle = r.sub(full, mediaTitle, 1) break Log('mediaTitle w/ possible abbrieviation fixed: %s' % mediaTitle) result = { 'siteNum': None, 'siteName': None, 'searchTitle': None, 'searchDate': None, } # Remove Site from Title siteNum = getSiteNumByFilter(mediaTitle) if siteNum is not None: Log('^^^^^^^ siteNum: %d' % siteNum) Log('^^^^^^^ Shortening Title') title = mediaTitle site = getSearchSiteName(siteNum).lower() title = re.sub(r'[^a-zA-Z0-9#& ]', ' ', title) site = re.sub(r'\W', '', site) matched = False while (' ' in title): title = title.replace(' ', '', 1) if title.lower().startswith(site): matched = True break if matched: searchTitle = re.sub(site, '', title, 1, flags=re.IGNORECASE) searchTitle = ' '.join(searchTitle.split()) else: searchTitle = mediaTitle searchTitle = PAutils.parseTitle(searchTitle, siteNum) Log('Search Title (before date processing): %s' % searchTitle) # Search Type searchDate = None regex = [(r'\b\d{4} \d{2} \d{2}\b', '%Y %m %d'), (r'\b\d{2} \d{2} \d{2}\b', '%y %m %d')] date_obj = None for r, dateFormat in regex: date = re.search(r, searchTitle) if date: try: date_obj = datetime.strptime(date.group(), dateFormat) except: pass if date_obj: searchDate = date_obj.strftime('%Y-%m-%d') searchTitle = ' '.join( re.sub(r, '', searchTitle, 1).split()) break searchTitle = searchTitle[0].upper() + searchTitle[1:] result['siteNum'] = siteNum result['siteName'] = site result['searchTitle'] = searchTitle result['searchDate'] = searchDate return result
def update(metadata, lang, siteNum, movieGenres, movieActors): splitScene = False metadata_id = str(metadata.id).split('|') sceneURL = PAutils.Decode(metadata_id[0]) sceneDate = metadata_id[2] req = PAutils.HTTPRequest(sceneURL) detailsPageElements = HTML.ElementFromString(req.text) if len(metadata_id) > 3: Log('Split Scene: %d' % int(metadata_id[3])) splitScene = True # Title metadata.title = PAutils.parseTitle( detailsPageElements.xpath('//h1/text()')[0], siteNum).strip() if splitScene: metadata.title = '%s [Scene %s]' % (metadata.title, metadata_id[3]) # Summary summary = '' try: summary = '\n'.join([ line.text_content().strip() for line in detailsPageElements.xpath( '//div[@class="product-details-container"]/div[@class="row breakout bg-lightgrey"]//h4/p' ) ]) except: pass metadata.summary = summary # Studio try: studio = detailsPageElements.xpath( '//ul[@class="list-unstyled m-b-2"]/li[contains(., "Studio:")]/a/text()' )[0].strip() except: studio = '' if studio: metadata.studio = studio # Tagline and Collection(s) metadata.collections.clear() tagline = '' try: tagline = re.sub( r'\(.*\)', '', detailsPageElements.xpath( '//div[@class="container"]/h2/a[@label="Series"]/text()') [0].strip().split('"')[1]).strip() metadata.tagline = tagline metadata.collections.add(tagline) except: if splitScene: metadata.collections.add( PAutils.parseTitle( detailsPageElements.xpath('//h1/text()')[0], siteNum).strip()) else: metadata.collections.add(studio) # Release Date if sceneDate: date_object = parse(sceneDate) metadata.originally_available_at = date_object metadata.year = metadata.originally_available_at.year # Genres movieGenres.clearGenres() for genreLink in detailsPageElements.xpath( '//div[@class="col-sm-4 m-b-2"]/ul/li//a[@label="Category"]'): genreName = genreLink.text_content().strip() movieGenres.addGenre(genreName) # Actors movieActors.clearActors() actors = [] if splitScene: actorNames = detailsPageElements.xpath( '//div[@class="container"]/div[@class="row"][./div[@class="col-sm-6 text-right text-left-xs m-b-1"]][%d]/div[2]/div/a' % int(metadata_id[3])) for name in actorNames: try: actors.append(name) except: pass else: actors = detailsPageElements.xpath( '//div[@class="col-sm-4 m-b-2"]/ul/li/a[@label="Performers - detail"]' ) for actorLink in actors: actorName = actorLink.text_content().strip() try: actorPhotoURL = detailsPageElements.xpath( '//div[@class="itempage"]/div/div[@class="row"]/div[@class="col-sm-3 col-md-4 col-lg-3 m-b-2"]/div/a[@label="Performer"][contains(., "%s")]//img/@src' % actorName)[0].strip() except: continue if actorName: movieActors.addActor(actorName, actorPhotoURL) # Posters art = [] cover = '//div[@class="boxcover-container"]/a/img/@src' splitscenes = '' if splitScene: splitscenes = '//div[@class="product-details-container"]/div[@class="container"]/div[@class="row"][./div[@class="col-sm-9 col-md-10"]][%d]/div[@class="col-sm-9 col-md-10"]/div/div/a/@href' % int( metadata_id[3]) try: if splitScene: art = art + detailsPageElements.xpath(splitscenes) art.append(detailsPageElements.xpath(cover)[0]) except: pass images = [] posterExists = False Log('Artwork found: %d' % len(art)) for idx, posterUrl in enumerate(art, 1): if not PAsearchSites.posterAlreadyExists(posterUrl, metadata): # Download image file for analysis try: image = PAutils.HTTPRequest( posterUrl, headers={'Referer': 'http://www.data18.empirestores.co'}) images.append(image) im = StringIO(image.content) resized_image = Image.open(im) width, height = resized_image.size # Add the image proxy items to the collection posterExists = True metadata.posters[posterUrl] = Proxy.Media(image.content, sort_order=idx) metadata.art[posterUrl] = Proxy.Media(image.content, sort_order=idx) except: pass if not posterExists: for idx, image in enumerate(images, 1): try: im = StringIO(image.content) resized_image = Image.open(im) width, height = resized_image.size # Add the image proxy items to the collection if width > 1: # Item is a poster metadata.posters[art[idx - 1]] = Proxy.Media( image.content, sort_order=idx) except: pass return metadata
def search(results, lang, siteNum, searchData): searchResults = [] googleResults = PAutils.getFromGoogleSearch(searchData.title, siteNum, lang='enes') for sceneURL in googleResults: sceneURL = sceneURL.replace('index.php/', '') sceneURL = sceneURL.replace('es/', '') if '/tags/' not in sceneURL and '/actr' not in sceneURL and '?pag' not in sceneURL and '/xvideos' not in sceneURL and '/tag/' not in sceneURL and sceneURL not in searchResults: searchResults.append(sceneURL) if '/en/' in sceneURL: searchResults.append(sceneURL.replace('en/', '')) for sceneURL in searchResults: req = PAutils.HTTPRequest(sceneURL) detailsPageElements = HTML.ElementFromString(req.text) try: if '/en/' in sceneURL: language = 'English' titleNoFormatting = PAutils.parseTitle( detailsPageElements.xpath('//title') [0].text_content().split('|')[0].split('-')[0].strip(), siteNum) else: language = 'Español' titleNoFormatting = detailsPageElements.xpath('//title')[ 0].text_content().split('|')[0].split('-')[0].strip() curID = PAutils.Encode(sceneURL) date = detailsPageElements.xpath( '//div[@class="released-views"]/span')[0].text_content().strip( ) if date: releaseDate = datetime.strptime( date, '%d/%m/%Y').strftime('%Y-%m-%d') else: releaseDate = searchData.dateFormat( ) if searchData.date else '' displayDate = releaseDate if date else '' if searchData.date and displayDate: score = 100 - Util.LevenshteinDistance(searchData.date, releaseDate) else: score = 100 - Util.LevenshteinDistance( searchData.title.lower(), titleNoFormatting.lower()) results.Append( MetadataSearchResult( id='%s|%d' % (curID, siteNum), name='%s {%s} [%s] %s' % (titleNoFormatting, language, PAsearchSites.getSearchSiteName(siteNum), displayDate), score=score, lang=lang)) except: pass return results
def update(metadata, lang, siteNum, movieGenres, movieActors, art): metadata_id = str(metadata.id).split('|') videoId = PAutils.Decode(metadata_id[0]) sceneDate = metadata_id[2] splitted = videoId.split(':') modelId = splitted[0] scene = splitted[-1] sceneNum = int(scene.replace('scene', '')) detailsPageElements = getGraphQL(findVideoQuery, 'videoId', videoId, siteNum)['video']['find']['result'] # Title metadata.title = PAutils.parseTitle(detailsPageElements['title'], siteNum) # Summary summary = detailsPageElements['description']['long'].strip() if not summary.endswith('.'): summary = summary + '.' metadata.summary = summary # Studio metadata.studio = PAsearchSites.getSearchSiteName(siteNum) # Tagline and Collection(s) metadata.collections.clear() metadata.collections.add(metadata.studio) # Release Date if sceneDate: date_object = parse(sceneDate) metadata.originally_available_at = date_object metadata.year = metadata.originally_available_at.year # Genres movieGenres.clearGenres() for key, value in genresDB.items(): if key.lower() == PAsearchSites.getSearchSiteName(siteNum).lower(): for genreName in value: movieGenres.addGenre(genreName) break # Actors movieActors.clearActors() for actorLink in detailsPageElements['talent']: actorPhoto = [] actorName = actorLink['talent']['name'] actorPhoto.append('/members/models/%s/profile-sm.jpg' % actorLink['talent']['talentId']) actorPhotoURL = getGraphQL( assetQuery, 'paths', actorPhoto, siteNum)['asset']['batch']['result'][0]['serve']['uri'] movieActors.addActor(actorName, actorPhotoURL) # Posters images = [] images.append('/members/models/%s/scenes/%s/videothumb.jpg' % (modelId, scene)) for idx in range(1, detailsPageElements['galleryCount'] + 1): path = '/members/models/%s/scenes/%s/photos/thumbs/%s-%s-%d-%d.jpg' % ( modelId, scene, PAsearchSites.getSearchSiteName(siteNum).lower(), modelId, sceneNum, idx) images.append(path) posters = getGraphQL(assetQuery, 'paths', images, siteNum)['asset']['batch']['result'] for poster in posters: if poster: art.append(poster['serve']['uri']) Log('Artwork found: %d' % len(art)) for idx, posterUrl in enumerate(art, 1): # Remove Timestamp and Token from URL cleanUrl = posterUrl.split('?')[0] art[idx - 1] = cleanUrl if not PAsearchSites.posterAlreadyExists(cleanUrl, metadata): # Download image file for analysis try: image = PAutils.HTTPRequest(posterUrl) im = StringIO(image.content) resized_image = Image.open(im) width, height = resized_image.size # Add the image proxy items to the collection if height > width: # Item is a poster metadata.posters[cleanUrl] = Proxy.Media(image.content, sort_order=idx) if width > height: # Item is an art item metadata.art[cleanUrl] = Proxy.Media(image.content, sort_order=idx) except: pass return metadata
def update(metadata, lang, siteNum, movieGenres, movieActors, art): metadata_id = str(metadata.id).split('|') sceneURL = PAutils.Decode(metadata_id[0]) try: sceneDate = metadata_id[2] except: pass req = PAutils.HTTPRequest(sceneURL) detailsPageElements = HTML.ElementFromString(req.text) movieGenres.clearGenres() movieActors.clearActors() # Title if 'webmasters' in sceneURL: resultTitleID = detailsPageElements.xpath('//h1/text()')[0] else: resultTitleID = detailsPageElements.xpath('//h4/span')[0].text_content() sceneID = re.sub(r'\D.*', '', resultTitleID) metadata.title = PAutils.parseTitle(re.sub(r'^\d+', '', resultTitleID), siteNum) # Summary try: if 'webmasters' in sceneURL: metadata.summary = detailsPageElements.xpath('//div[@class="row gallery-description"]//div')[1].text_content().strip() else: metadata.summary = detailsPageElements.xpath('//div[@class="row"]//a/@title')[0].strip() except: pass # Tagline and Collection(s) metadata.collections.clear() metadata.studio = PAsearchSites.getSearchSiteName(siteNum) metadata.tagline = metadata.studio metadata.collections.add(metadata.studio) # Actors if 'webmasters' in sceneURL: actors = detailsPageElements.xpath('//spam[@class="key-words"]//a') else: actors = detailsPageElements.xpath('//h5//a') actorPhotoURL = '' # Remove Actor Names from Genre List genres = detailsPageElements.xpath('//meta[@name="keywords"]/@content')[0].replace('Aussie Ass', '') genres = re.sub(r'id.\d*', '', genres, flags=re.IGNORECASE) if actors: for actorLink in actors: actorName = actorLink.text_content().title() genres = genres.replace(actorName, '') modelURL = actorLink.xpath('./@href')[0] req = PAutils.HTTPRequest(modelURL) actorsPageElements = HTML.ElementFromString(req.text) img = actorsPageElements.xpath('//img[contains(@id, "set-target")]/@src')[0] if img: actorPhotoURL = img if 'http' not in actorPhotoURL: actorPhotoURL = PAsearchSites.getSearchBaseURL(siteNum) + actorPhotoURL movieActors.addActor(actorName, actorPhotoURL) # Date date = "" try: if 'webmasters' in sceneURL: pageResults = (int)(actorsPageElements.xpath('//span[@class="number_item "]')[0].text_content().strip()) if not pageResults: pageResults = 1 for x in range(pageResults): if x == 1: actorsPageElements.xpath('//a[contains(@class, "in_stditem")]/@href')[1] req = PAutils.HTTPRequest(PAsearchSites.getSearchBaseURL(siteNum) + actorsPageElements.xpath('//a[contains(@class, "in_stditem")]/@href')[1]) actorsPageElements = HTML.ElementFromString(req.text) for sceneElements in actorsPageElements.xpath('//div[@class="box"]'): if sceneID in sceneElements.xpath('.//a/text()')[1]: date = actorsPageElements.xpath('.//span[@class="video-date"]')[0].text_content().strip() break else: date = sceneDate except: date = sceneDate if date: date = parse(date).strftime('%d-%m-%Y') date_object = datetime.strptime(date, '%d-%m-%Y') metadata.originally_available_at = date_object metadata.year = metadata.originally_available_at.year # Genres for genreLink in genres.split(','): genreName = genreLink.strip() movieGenres.addGenre(genreName) # Posters xpaths = [ '//img[contains(@alt, "content")]/@src', '//div[@class="box"]//img/@src', ] altURL = "" for xpath in xpaths: for img in detailsPageElements.xpath(xpath): if 'http' not in img: if 'join' in img: break elif 'webmasters' in sceneURL: img = sceneURL + "/" + img else: img = PAsearchSites.getSearchBaseURL(siteNum) + img art.append(img) if 'webmasters' not in sceneURL: altURL = PAsearchSites.getSearchBaseURL(siteNum) + "/webmasters/" + sceneID req = PAutils.HTTPRequest(altURL) detailsPageElements = HTML.ElementFromString(req.text) sceneURL = altURL Log('Artwork found: %d' % len(art)) for idx, posterUrl in enumerate(art, 1): if not PAsearchSites.posterAlreadyExists(posterUrl, metadata): # Download image file for analysis try: image = PAutils.HTTPRequest(posterUrl) im = StringIO(image.content) resized_image = Image.open(im) width, height = resized_image.size # Add the image proxy items to the collection if width > 1 or height > width: # Item is a poster metadata.posters[posterUrl] = Proxy.Media(image.content, sort_order=idx) if width > 100 and width > height: # Item is an art item metadata.art[posterUrl] = Proxy.Media(image.content, sort_order=idx) except: pass return metadata
def update(metadata, lang, siteNum, movieGenres, movieActors): metadata_id = str(metadata.id).split('|') sceneURL = PAutils.Decode(metadata_id[0]) sceneDate = metadata_id[2] req = PAutils.HTTPRequest(sceneURL) detailsPageElements = HTML.ElementFromString(req.text) # Title metadata.title = PAutils.parseTitle( detailsPageElements.xpath('//h3[@class="mas_title"]') [0].text_content().strip(), siteNum) # Summary metadata.summary = detailsPageElements.xpath( '//p[@class="mas_longdescription"]')[0].text_content().strip() # Studio metadata.studio = 'Deranged Dollars' # Tagline and Collection(s) metadata.collections.clear() tagline = detailsPageElements.xpath('//title')[0].text_content().split( '|')[1].strip().replace('.com', '') metadata.tagline = tagline metadata.collections.add(metadata.tagline) # Release Date if sceneDate: date_object = parse(sceneDate) metadata.originally_available_at = date_object metadata.year = metadata.originally_available_at.year # Genres movieGenres.clearGenres() for genreLink in detailsPageElements.xpath('//p[@class="tags"]/a'): genreName = PAutils.parseTitle(genreLink.text_content().strip(), siteNum) movieGenres.addGenre(genreName) # Actors movieActors.clearActors() actors = detailsPageElements.xpath( '//div[@class="lch"]/span')[0].text_content().rsplit(',', 2)[0] if ':' in actors: actors = re.split(',|&|/|And', actors.split(':', 1)[1]) else: actors = re.split(',|&|/|And', actors) modelURL = PAsearchSites.getSearchSearchURL(siteNum) + '?models' req = PAutils.HTTPRequest(modelURL) modelPageElements = HTML.ElementFromString(req.text) models = modelPageElements.xpath('//div[@class="item"]') modelURL = PAsearchSites.getSearchSearchURL(siteNum) + '?models/2' req = PAutils.HTTPRequest(modelURL) modelPageElements = HTML.ElementFromString(req.text) models += modelPageElements.xpath('//div[@class="item"]') for actorLink in actors: actorName = actorLink.strip() actorName = re.sub(r'\W', ' ', actorName).replace('Nurses', '').replace('Nurse', '') actorPhotoURL = '' for model in models: if ':' in model.text_content().strip(): if actorName in model.text_content().split(':', 1)[1].strip(): actorName = model.text_content().split(':', 1)[1].strip() actorPhotoURL = PAsearchSites.getSearchSearchURL( siteNum) + model.xpath('.//@src')[0] break else: if actorName in model.text_content().strip(): actorName = model.text_content().strip() actorPhotoURL = PAsearchSites.getSearchSearchURL( siteNum) + model.xpath('.//@src')[0] break movieActors.addActor(actorName, actorPhotoURL) # Posters art = [] xpaths = [ '//div[@class="stills clearfix"]//img/@src', '//div[@class="mainpic"]//script/text()', ] for xpath in xpaths: for img in detailsPageElements.xpath(xpath): if '\'' in img: img = img.split('\'')[1] if 'http' not in img: img = PAsearchSites.getSearchSearchURL(siteNum) + img art.append(img) Log('Artwork found: %d' % len(art)) for idx, posterUrl in enumerate(art, 1): if not PAsearchSites.posterAlreadyExists(posterUrl, metadata): # Download image file for analysis try: image = PAutils.HTTPRequest(posterUrl) im = StringIO(image.content) resized_image = Image.open(im) width, height = resized_image.size # Add the image proxy items to the collection if width > 1: # Item is a poster metadata.posters[posterUrl] = Proxy.Media(image.content, sort_order=idx) if width > 100: # Item is an art item metadata.art[posterUrl] = Proxy.Media(image.content, sort_order=idx) except: pass return metadata
def search(results, encodedTitle, searchTitle, siteNum, lang, searchDate): searchResults = [] siteResults = [] temp = [] count = 0 sceneID = None splited = searchTitle.split(' ') if unicode(splited[0], 'UTF-8').isdigit(): sceneID = splited[0] if int(sceneID) > 100: searchTitle = searchTitle.replace(sceneID, '', 1).strip() sceneURL = '%s/content/%s' % ( PAsearchSites.getSearchBaseURL(siteNum), sceneID) searchResults.append(sceneURL) encodedTitle = searchTitle.replace(' ', '+') searchURL = '%s%s' % (PAsearchSites.getSearchSearchURL(siteNum), encodedTitle) req = PAutils.HTTPRequest(searchURL, headers={'Referer': 'http://www.data18.com'}) searchPageElements = HTML.ElementFromString(req.text) for searchResult in searchPageElements.xpath( '//p[@class="genmed"]//parent::div'): sceneURL = searchResult.xpath( './/*[contains(@href, "content")]/@href')[0] if sceneURL not in searchResults: urlID = re.sub(r'.*/', '', sceneURL) try: siteName = searchResult.xpath( './/*[contains(., "Network")]')[0].text_content().replace( 'Network:', '').strip() except: try: siteName = searchResult.xpath('.//*[contains(., "Studio")]' )[0].text_content().replace( 'Studio:', '').strip() except: siteName = '' try: subSite = searchResult.xpath( './/p[@class][contains(., "Site:")]')[0].text_content( ).replace('Site:', '').strip() except: subSite = '' if siteName: siteDisplay = '%s/%s' % (siteName, subSite) if subSite else siteName else: siteDisplay = subSite titleNoFormatting = PAutils.parseTitle( searchResult.xpath('.//*[contains(@href, "content")]') [1].text_content(), siteNum) curID = PAutils.Encode(sceneURL) siteResults.append(sceneURL) try: date = searchResult.xpath( './/p[@class="genmed"]')[0].text_content().strip() date = re.sub(r'^#(.*?)\s', '', date) except: date = '' if date and not date == 'unknown': date = date.replace('Sept', 'Sep') releaseDate = parse(date).strftime('%Y-%m-%d') else: releaseDate = parse(searchDate).strftime( '%Y-%m-%d') if searchDate else '' displayDate = releaseDate if date else '' if sceneID == urlID: score = 100 elif searchDate and displayDate: score = 80 - Util.LevenshteinDistance(searchDate, releaseDate) else: score = 80 - Util.LevenshteinDistance( searchTitle.lower(), titleNoFormatting.lower()) if score == 80: count += 1 temp.append( MetadataSearchResult( id='%s|%d|%s' % (curID, siteNum, releaseDate), name='%s [%s] %s' % (titleNoFormatting, siteDisplay, displayDate), score=score, lang=lang)) else: results.Append( MetadataSearchResult( id='%s|%d|%s' % (curID, siteNum, releaseDate), name='%s [%s] %s' % (titleNoFormatting, siteDisplay, displayDate), score=score, lang=lang)) googleResults = PAutils.getFromGoogleSearch(searchTitle, siteNum) for sceneURL in googleResults: if ('/content/' in sceneURL and '.html' not in sceneURL and sceneURL not in searchResults and sceneURL not in siteResults): searchResults.append(sceneURL) for sceneURL in searchResults: req = PAutils.HTTPRequest(sceneURL) detailsPageElements = HTML.ElementFromString(req.text) urlID = re.sub(r'.*/', '', sceneURL) try: siteName = detailsPageElements.xpath( '//i[contains(., "Network")]//preceding-sibling::a[1]' )[0].text_content().strip() except: try: siteName = detailsPageElements.xpath( '//i[contains(., "Studio")]//preceding-sibling::a[1]' )[0].text_content().strip() except: siteName = '' try: subSite = detailsPageElements.xpath( '//i[contains(., "Site")]//preceding-sibling::a[1]' )[0].text_content().strip() except: subSite = '' if siteName: siteDisplay = '%s/%s' % (siteName, subSite) if subSite else siteName else: siteDisplay = subSite titleNoFormatting = PAutils.parseTitle( detailsPageElements.xpath('//h1')[0].text_content(), siteNum) curID = PAutils.Encode(sceneURL) try: date = detailsPageElements.xpath( '//span[@class][./*[contains(.., "date")]]')[0].text_content( ).split(':', 2)[-1].strip() except: date = '' if date and not date == 'unknown': releaseDate = parse(date).strftime('%Y-%m-%d') else: releaseDate = parse(searchDate).strftime( '%Y-%m-%d') if searchDate else '' displayDate = releaseDate if date else '' if sceneID == urlID: score = 100 elif searchDate and displayDate: score = 80 - Util.LevenshteinDistance(searchDate, releaseDate) else: score = 80 - Util.LevenshteinDistance(searchTitle.lower(), titleNoFormatting.lower()) if score == 80: count += 1 temp.append( MetadataSearchResult( id='%s|%d|%s' % (curID, siteNum, releaseDate), name='%s [%s] %s' % (titleNoFormatting, siteDisplay, displayDate), score=score, lang=lang)) else: results.Append( MetadataSearchResult( id='%s|%d|%s' % (curID, siteNum, releaseDate), name='%s [%s] %s' % (titleNoFormatting, siteDisplay, displayDate), score=score, lang=lang)) for result in temp: if count > 1 and result.score == 80: results.Append( MetadataSearchResult(id=result.id, name=result.name, score=79, lang=lang)) else: results.Append( MetadataSearchResult(id=result.id, name=result.name, score=result.score, lang=lang)) return results
def update(metadata, siteNum, movieGenres, movieActors): metadata_id = str(metadata.id).split('|') sceneURL = PAutils.Decode(metadata_id[0]) sceneDate = metadata_id[2] req = PAutils.HTTPRequest(sceneURL) detailsPageElements = HTML.ElementFromString(req.text) # Title metadata.title = PAutils.parseTitle( detailsPageElements.xpath('//h1')[0].text_content(), siteNum) # Summary try: metadata.summary = detailsPageElements.xpath( '//div[@class="gen12"]/p[contains(., "Story")]')[0].text_content( ).split('\n', 2)[-1] except: pass # Studio try: metadata.studio = detailsPageElements.xpath( '//i[contains(., "Network")]//preceding-sibling::a[1]' )[0].text_content().strip() except: try: metadata.studio = detailsPageElements.xpath( '//i[contains(., "Studio")]//preceding-sibling::a[1]' )[0].text_content().strip() except: pass # Tagline and Collection(s) metadata.collections.clear() try: tagline = detailsPageElements.xpath( '//i[contains(., "Site")]//preceding-sibling::a[1]' )[0].text_content().strip() if not metadata.studio: metadata.studio = tagline else: metadata.tagline = tagline metadata.collections.add(tagline) except: metadata.collections.add(metadata.studio) # Release Date if sceneDate: date_object = parse(sceneDate) metadata.originally_available_at = date_object metadata.year = metadata.originally_available_at.year # Genres movieGenres.clearGenres() for genreLink in detailsPageElements.xpath( '//div[./b[contains(., "Categories")]]//a'): genreName = genreLink.text_content().strip() movieGenres.addGenre(genreName) # Actors movieActors.clearActors() actors = detailsPageElements.xpath( '//p[contains(., "Starring")]//following-sibling::a[1]') if actors: for actorLink in actors: actorName = actorLink.text_content().strip() actorPhotoURL = '' movieActors.addActor(actorName, actorPhotoURL) # Posters art = [] xpaths = [ '//img[contains(@src, "th8")]/@src', ] try: req = PAutils.HTTPRequest( detailsPageElements.xpath('//@href[contains(., "viewer")]')[0]) photoPageElements = HTML.ElementFromString(req.text) for xpath in xpaths: for img in photoPageElements.xpath(xpath): art.append(img.replace('/th8', '')) except: pass try: img = detailsPageElements.xpath('//div[@id="moviewrap"]//@src')[0] art.append(img) except: pass images = [] posterExists = False Log('Artwork found: %d' % len(art)) for idx, posterUrl in enumerate(art, 1): if not PAsearchSites.posterAlreadyExists(posterUrl, metadata): # Download image file for analysis try: image = PAutils.HTTPRequest( posterUrl, headers={'Referer': 'http://www.data18.com'}) images.append(image) im = StringIO(image.content) resized_image = Image.open(im) width, height = resized_image.size # Add the image proxy items to the collection if height > width: # Item is a poster posterExists = True metadata.posters[posterUrl] = Proxy.Media(image.content, sort_order=idx) if width > height: # Item is an art item metadata.art[posterUrl] = Proxy.Media(image.content, sort_order=idx) except: pass if not posterExists: for idx, image in enumerate(images, 1): try: im = StringIO(image.content) resized_image = Image.open(im) width, height = resized_image.size # Add the image proxy items to the collection if width > 1: # Item is a poster metadata.posters[art[idx - 1]] = Proxy.Media( image.content, sort_order=idx) except: pass return metadata
def update(metadata, lang, siteNum, movieGenres, movieActors, art): metadata_id = metadata.id.split('|') sceneURL = PAutils.Decode(metadata_id[0]) if not sceneURL.startswith('http'): sceneURL = PAsearchSites.getSearchBaseURL(siteNum) + sceneURL req = PAutils.HTTPRequest(sceneURL) detailsPageElements = HTML.ElementFromString(req.text) # Title metadata.title = PAutils.parseTitle( detailsPageElements.xpath('//h1[@class="watchpage-title"]') [0].text_content().strip(), siteNum) # Studio metadata.studio = 'AnalVids' # Tagline and Collection(s) metadata.collections.clear() tagline = detailsPageElements.xpath( '//a[@class="watchpage-studioname"]/text()')[0].strip() metadata.tagline = tagline metadata.collections.add(tagline) # Release Date releaseDate = detailsPageElements.xpath( '//span[@class="scene-description__detail"]//a/text()')[0] date_object = parse(releaseDate) metadata.originally_available_at = date_object metadata.year = metadata.originally_available_at.year # Genres movieGenres.clearGenres() genres = detailsPageElements.xpath('//dd/a[contains(@href, "/niche/")]') for genreLink in genres: genreName = genreLink.text_content().title() movieGenres.addGenre(genreName) # Actors movieActors.clearActors() actors = detailsPageElements.xpath( '//dd/a[contains(@href, "model") and not(contains(@href, "forum"))]') for actorLink in actors: actorName = actorLink.text_content() actorPageURL = actorLink.get('href') req = PAutils.HTTPRequest(actorPageURL) actorPage = HTML.ElementFromString(req.text) actorPhotoURL = actorPage.xpath( '//div[@class="model--avatar"]//img/@src')[0] movieActors.addActor(actorName, actorPhotoURL) # Director director = metadata.directors.new() if tagline == 'Giorgio Grandi' or tagline == 'Giorgio\'s Lab': director.name = 'Giorgio Grandi' try: directors = detailsPageElements.xpath('//p[@class="director"]/a') for dirname in directors: director.name = dirname.text_content().strip() except: pass # Posters/Background art.append( detailsPageElements.xpath('//div[@id="player"]/@style')[0].split( 'url(')[1].split(')')[0]) for img in detailsPageElements.xpath( '//div[contains(@class, "thumbs2 gallery")]//img/@src'): art.append(img.split('?')[0]) art.append(img) Log('Artwork found: %d' % len(art)) for idx, posterUrl in enumerate(art, 1): if not PAsearchSites.posterAlreadyExists(posterUrl, metadata): # Download image file for analysis try: image = PAutils.HTTPRequest(posterUrl) im = StringIO(image.content) resized_image = Image.open(im) width, height = resized_image.size # Add the image proxy items to the collection if width > 1: # Item is a poster metadata.posters[posterUrl] = Proxy.Media(image.content, sort_order=idx) if width > 100: # Item is an art item metadata.art[posterUrl] = Proxy.Media(image.content, sort_order=idx) except: pass return metadata
def search(results, lang, siteNum, searchData): searchResults = [] siteResults = [] temp = [] directID = False count = 0 scene_matched = False sceneID = None parts = searchData.title.split() if unicode(parts[0], 'UTF-8').isdigit(): sceneID = parts[0] if int(sceneID) > 100: searchData.title = searchData.title.replace(sceneID, '', 1).strip() movieURL = '%s/%s' % (PAsearchSites.getSearchBaseURL(siteNum), sceneID) searchResults.append(movieURL) directID = True scene_break = re.search(r'.*(?=Scene\s\d)', searchData.title) if scene_break: scene_break = (scene_break.group().strip(), searchData.title[-1]) searchData.title = scene_break[0] searchData.encoded = searchData.title.replace(' ', '+') searchURL = '%s%s' % (PAsearchSites.getSearchSearchURL(siteNum), searchData.encoded) req = PAutils.HTTPRequest( searchURL, headers={'Referer': 'http://www.data18.empirestores.co'}) searchPageElements = HTML.ElementFromString(req.text) if not directID: for searchResult in searchPageElements.xpath( '//div[@class="product-card"]'): if scene_matched: break movieURL = '%s%s' % ( PAsearchSites.getSearchBaseURL(siteNum), searchResult.xpath( './div[@class="boxcover-container"]/a/@href')[0].strip()) urlID = searchResult.xpath( './div[@class="boxcover-container"]/a/@href')[0].split("/")[1] if movieURL not in searchResults: titleNoFormatting = PAutils.parseTitle( searchResult.xpath( './div[@class="product-details"]/div/a/text()') [0].strip(), siteNum) curID = PAutils.Encode(movieURL) siteResults.append(movieURL) if sceneID == urlID: score = 100 else: score = 80 - Util.LevenshteinDistance( searchData.title.lower(), titleNoFormatting.lower()) if score > 70: sceneURL = PAutils.Decode(curID) req = PAutils.HTTPRequest(sceneURL) detailsPageElements = HTML.ElementFromString(req.text) # Find date on movie specific page date = detailsPageElements.xpath( '//ul[@class="list-unstyled m-b-2"]/li[contains(., "Released:")]/text()' )[0].strip() if date and not date == 'unknown': try: releaseDate = datetime.strptime( date, '%b %d, %Y').strftime('%Y-%m-%d') except: releaseDate = '' else: releaseDate = searchData.dateFormat( ) if searchData.date else '' displayDate = releaseDate if date else '' # Studio try: studio = detailsPageElements.xpath( '//ul[@class="list-unstyled m-b-2"]/li[contains(., "Studio:")]/a/text()' )[0].strip() except: studio = '' if sceneID == urlID: score = 100 elif searchData.date and displayDate: score = 80 - Util.LevenshteinDistance( searchData.date, releaseDate) else: score = 80 - Util.LevenshteinDistance( searchData.title.lower(), titleNoFormatting.lower()) if score == 80: count += 1 temp.append( MetadataSearchResult( id='%s|%d|%s' % (curID, siteNum, releaseDate), name='%s [%s] %s' % (titleNoFormatting, studio, displayDate), score=score, lang=lang)) else: results.Append( MetadataSearchResult( id='%s|%d|%s' % (curID, siteNum, releaseDate), name='%s [%s] %s' % (titleNoFormatting, studio, displayDate), score=score, lang=lang)) # Split Scenes scenes = detailsPageElements.xpath( '//div[@class="product-details-container"]/div[@class="container"]/div[@class="row"]' ) sceneCount = (len(scenes) - 1) / 2 for sceneNum in range(0, sceneCount): section = 'Scene %d' % (sceneNum + 1) actorNames = ', '.join( detailsPageElements.xpath( '//div[@class="container"]/div[@class="row"][./div[@class="col-sm-6 text-right text-left-xs m-b-1"]][%d]/div[2]/div/a/text()' % (sceneNum + 1))) if scene_break and titleNoFormatting.replace( '-', ' ') == scene_break[0] and sceneNum + 1 == int( scene_break[1]): scene_matched = True results.Append( MetadataSearchResult( id='%s|%d|%s|%d' % (curID, siteNum, releaseDate, sceneNum + 1), name='%s/#%d[%s][%s] %s' % (titleNoFormatting, sceneNum + 1, actorNames, studio, displayDate), score=100, lang=lang)) break elif score == 80: count += 1 temp.append( MetadataSearchResult( id='%s|%d|%s|%d' % (curID, siteNum, releaseDate, sceneNum + 1), name='%s/#%d[%s][%s] %s' % (titleNoFormatting, sceneNum + 1, actorNames, studio, displayDate), score=score, lang=lang)) else: results.Append( MetadataSearchResult( id='%s|%d|%s|%d' % (curID, siteNum, releaseDate, sceneNum + 1), name='%s/#%d[%s][%s] %s' % (titleNoFormatting, sceneNum + 1, actorNames, studio, displayDate), score=score, lang=lang)) else: if score == 80: count += 1 temp.append( MetadataSearchResult( id='%s|%d|%s' % (curID, siteNum, releaseDate), name='%s' % (titleNoFormatting), score=score, lang=lang)) else: results.Append( MetadataSearchResult( id='%s|%d|%s' % (curID, siteNum, releaseDate), name='%s' % (titleNoFormatting), score=score, lang=lang)) googleResults = PAutils.getFromGoogleSearch(searchData.title, siteNum) for movieURL in googleResults: if ('/movies/' in movieURL and '.html' not in movieURL and movieURL not in searchResults and movieURL not in siteResults): searchResults.append(movieURL) for movieURL in searchResults: req = PAutils.HTTPRequest(movieURL) detailsPageElements = HTML.ElementFromString(req.text) urlID = re.sub(r'.*/', '', movieURL) titleNoFormatting = PAutils.parseTitle( detailsPageElements.xpath('//h1/text()')[0].strip(), siteNum) curID = PAutils.Encode(movieURL) date = detailsPageElements.xpath( '//ul[@class="list-unstyled m-b-2"]/li[contains(., "Released:")]/text()' )[0].strip() if date and not date == 'unknown': try: releaseDate = datetime.strptime( date, '%b %d %Y').strftime('%Y-%m-%d') except: releaseDate = '' else: releaseDate = searchData.dateFormat() if searchData.date else '' displayDate = releaseDate if date else '' if sceneID == urlID: score = 100 elif searchData.date and displayDate: score = 80 - Util.LevenshteinDistance(searchData.date, releaseDate) else: score = 80 - Util.LevenshteinDistance(searchData.title.lower(), titleNoFormatting.lower()) # Studio try: studio = detailsPageElements.xpath( '//ul[@class="list-unstyled m-b-2"]/li[contains(., "Studio:")]/a/text()' )[0].strip() except: studio = '' if score == 80: count += 1 temp.append( MetadataSearchResult(id='%s|%d|%s' % (curID, siteNum, releaseDate), name='%s [%s] %s' % (titleNoFormatting, studio, displayDate), score=score, lang=lang)) else: results.Append( MetadataSearchResult(id='%s|%d|%s' % (curID, siteNum, releaseDate), name='%s [%s] %s' % (titleNoFormatting, studio, displayDate), score=score, lang=lang)) # Split Scenes scenes = detailsPageElements.xpath( '//div[@class="product-details-container"]/div[@class="container"]/div[@class="row"]' ) sceneCount = (len(scenes) - 1) / 2 for sceneNum in range(0, sceneCount): actorNames = ', '.join( detailsPageElements.xpath( '//div[@class="container"]/div[@class="row"][./div[@class="col-sm-6 text-right text-left-xs m-b-1"]][%d]/div[2]/div/a/text()' % (sceneNum + 1))) if score == 80: count += 1 temp.append( MetadataSearchResult( id='%s|%d|%s|%d' % (curID, siteNum, releaseDate, sceneNum + 1), name='%s/#%d[%s][%s] %s' % (titleNoFormatting, sceneNum + 1, actorNames, studio, displayDate), score=score, lang=lang)) else: results.Append( MetadataSearchResult( id='%s|%d|%s|%d' % (curID, siteNum, releaseDate, sceneNum + 1), name='%s/#%d[%s][%s] %s' % (titleNoFormatting, sceneNum + 1, actorNames, studio, displayDate), score=score, lang=lang)) for result in temp: if count > 1 and result.score == 80: results.Append( MetadataSearchResult(id=result.id, name=result.name, score=79, lang=lang)) else: results.Append( MetadataSearchResult(id=result.id, name=result.name, score=result.score, lang=lang)) return results
def update(metadata, lang, siteNum, movieGenres, movieActors, art): metadata_id = str(metadata.id).split('|') sceneURL = PAutils.Decode(metadata_id[0]) sceneDate = metadata_id[2] req = PAutils.HTTPRequest(sceneURL) detailsPageElements = HTML.ElementFromString(req.text) # Title metadata.title = detailsPageElements.xpath('//div[@class="update_title"]')[0].text_content().strip() # Summary metadata.summary = detailsPageElements.xpath('//span[@class="update_description"]')[0].text_content().strip() # Studio metadata.studio = 'New Sensations' # Tagline and Collection(s) metadata.collections.clear() tagline = PAsearchSites.getSearchSiteName(siteNum) metadata.tagline = tagline metadata.collections.add(tagline) # Release Date if sceneDate: date_object = parse(sceneDate) metadata.originally_available_at = date_object metadata.year = metadata.originally_available_at.year # Genres movieGenres.clearGenres() for genreLink in detailsPageElements.xpath('//span[@class="update_tags"]/a'): genreName = PAutils.parseTitle(genreLink.text_content().replace('-', '').strip(), siteNum) movieGenres.addGenre(genreName) # Actors movieActors.clearActors() for actorLink in detailsPageElements.xpath('//span[@class="update_models"]/a'): actorName = actorLink.text_content().strip() modelURL = actorLink.xpath('.//@href')[0] req = PAutils.HTTPRequest(modelURL) actorPageElements = HTML.ElementFromString(req.text) actorPhotoURL = actorPageElements.xpath('//div[@class="cell_top cell_thumb"]/img/@src0_1x')[0] movieActors.addActor(actorName, actorPhotoURL) # Posters xpaths = [ '//div[@class="mejs-layers"]//img/@src', ] for xpath in xpaths: for img in detailsPageElements.xpath(xpath): art.append(img) for scene in actorPageElements.xpath('//div[@class="table dvd_info"]'): resultTitle = scene.xpath('.//div[@class="update_title"]')[0].text_content() if resultTitle.lower() == metadata.title.lower(): for img in scene.xpath('.//div[@class="cell"]//@src0_3x'): art.append(img) Log('Artwork found: %d' % len(art)) for idx, posterUrl in enumerate(art, 1): if not PAsearchSites.posterAlreadyExists(posterUrl, metadata): # Download image file for analysis try: image = PAutils.HTTPRequest(posterUrl) im = StringIO(image.content) resized_image = Image.open(im) width, height = resized_image.size # Add the image proxy items to the collection if height > 1: # Item is a poster metadata.posters[posterUrl] = Proxy.Media(image.content, sort_order=idx) if width > 100: # Item is an art item metadata.art[posterUrl] = Proxy.Media(image.content, sort_order=idx) except: pass return metadata
def update(metadata, lang, siteNum, movieGenres, movieActors): metadata_id = str(metadata.id).split('|') sceneURL = PAutils.Decode(metadata_id[0]) sceneDate = metadata_id[2] req = PAutils.HTTPRequest(sceneURL) detailsPageElements = HTML.ElementFromString(req.text) # Title metadata.title = detailsPageElements.xpath( '//h3')[0].text_content().strip() # Summary metadata.summary = ' '.join( detailsPageElements.xpath( '//div[@class="videoDetails clear"]//p/span//text()')).replace( 'FULL VIDEO', '') # Tagline and Collection(s) metadata.collections.clear() tagline = PAsearchSites.getSearchSiteName(siteNum) metadata.studio = tagline if Prefs['collections_addsitename']: metadata.collections.add(tagline) # Release Date if sceneDate: date_object = parse(sceneDate) metadata.originally_available_at = date_object metadata.year = metadata.originally_available_at.year # Genres movieGenres.clearGenres() for genreLink in detailsPageElements.xpath( '//li[contains(., "Tags")]//parent::ul//a'): genreName = PAutils.parseTitle(genreLink.text_content().strip(), siteNum) movieGenres.addGenre(genreName) # Actors movieActors.clearActors() for actorLink in detailsPageElements.xpath('//li[@class="update_models"]'): actorName = actorLink.text_content().strip() modelURL = actorLink.xpath('.//@href')[0] req = PAutils.HTTPRequest(modelURL) actorPageElements = HTML.ElementFromString(req.text) actorPhotoURL = actorPageElements.xpath( '//div[@class="profile-pic"]//@src0_3x')[0] if 'http' not in actorPhotoURL: actorPhotoURL = PAsearchSites.getSearchBaseURL( siteNum) + actorPhotoURL movieActors.addActor(actorName, actorPhotoURL) # Posters art = [] xpaths = [ '//div[@class="player_thumbs"]//@src0_3x', '//div[@class="player full_width"]/script/text()', ] for xpath in xpaths: for img in detailsPageElements.xpath(xpath): search = re.search(r'(?<=src0_3x=").*?(?=")', img) if search: img = search.group(0) if 'http' not in img: img = PAsearchSites.getSearchBaseURL(siteNum) + img art.append(img) Log('Artwork found: %d' % len(art)) for idx, posterUrl in enumerate(art, 1): if not PAsearchSites.posterAlreadyExists(posterUrl, metadata): # Download image file for analysis try: image = PAutils.HTTPRequest(posterUrl) im = StringIO(image.content) resized_image = Image.open(im) width, height = resized_image.size # Add the image proxy items to the collection if height > 1: # Item is a poster metadata.posters[posterUrl] = Proxy.Media(image.content, sort_order=idx) if width > 100: # Item is an art item metadata.art[posterUrl] = Proxy.Media(image.content, sort_order=idx) except: pass return metadata
def update(metadata, lang, siteNum, movieGenres, movieActors): metadata_id = str(metadata.id).split('|') sceneURL = PAutils.Decode(metadata_id[0]) req = PAutils.HTTPRequest(sceneURL) detailsPageElements = HTML.ElementFromString(req.text) # Title if '/en/' in sceneURL: metadata.title = PAutils.parseTitle( detailsPageElements.xpath('//title')[0].text_content().split('|') [0].split('-')[0].strip(), siteNum) else: metadata.title = detailsPageElements.xpath( '//title')[0].text_content().split('|')[0].split('-')[0].strip() # Summary metadata.summary = detailsPageElements.xpath( '//div[@class="description clearfix"]')[0].text_content().split( ':')[-1].strip().replace('\n', ' ') # Tagline and Collection(s) metadata.collections.clear() tagline = PAsearchSites.getSearchSiteName(siteNum).strip() metadata.studio = tagline metadata.collections.add(tagline) # Genres movieGenres.clearGenres() for genreLink in detailsPageElements.xpath('//div[@class="categories"]/a'): genreName = genreLink.text_content().strip() movieGenres.addGenre(genreName) # Actors movieActors.clearActors() if '/en/' in sceneURL: if '&' in metadata.title: actors = metadata.title.split('&') else: actors = detailsPageElements.xpath( '//span[@class="site-name"]')[0].text_content().split(' and ') else: if '&' in metadata.title: actors = metadata.title.split('&') else: actors = detailsPageElements.xpath( '//span[@class="site-name"]')[0].text_content().split(' y ') for actorLink in actors: actorName = actorLink.strip() modelURL = '%s/actrices/%s' % (PAsearchSites.getSearchBaseURL(siteNum), metadata.title[0].lower()) req = PAutils.HTTPRequest(modelURL) modelPageElements = HTML.ElementFromString(req.text) for model in modelPageElements.xpath( '//div[@class="c-boxlist__box--image"]//parent::a'): if model.text_content().strip().lower() == metadata.title.lower(): actorName = metadata.title break if 'africa' in actorName.lower(): actorName = 'Africat' elif metadata.title == 'MAMADA ARGENTINA': actorName = 'Alejandra Argentina' elif actorName == 'Alika': actorName = 'Alyka' modelURL = '%s/actrices/%s' % (PAsearchSites.getSearchBaseURL(siteNum), actorName[0].lower()) req = PAutils.HTTPRequest(modelURL) modelPageElements = HTML.ElementFromString(req.text) actorPhotoURL = '' for model in modelPageElements.xpath( '//div[@class="c-boxlist__box--image"]//parent::a'): if model.text_content().strip().lower() == actorName.lower(): actorPhotoURL = model.xpath('.//img/@src')[0].strip() break movieActors.addActor(actorName, actorPhotoURL) # Posters art = [] img = detailsPageElements.xpath( '//div[@class="top-area-content"]/script')[0].text_content().strip() posterImage = re.search(r'(?<=posterImage:\s").*(?=")', img) if posterImage: img = posterImage.group(0) art.append(img) Log('Artwork found: %d' % len(art)) for idx, posterUrl in enumerate(art, 1): if not PAsearchSites.posterAlreadyExists(posterUrl, metadata): # Download image file for analysis try: image = PAutils.HTTPRequest(posterUrl) im = StringIO(image.content) resized_image = Image.open(im) width, height = resized_image.size # Add the image proxy items to the collection if height > 1: # Item is a poster metadata.posters[posterUrl] = Proxy.Media(image.content, sort_order=idx) if width > 100: # Item is an art item metadata.art[posterUrl] = Proxy.Media(image.content, sort_order=idx) except: pass return metadata
def update(metadata, lang, siteNum, movieGenres, movieActors, art): metadata_id = str(metadata.id).split('|') sceneURL = PAutils.Decode(metadata_id[0]) sceneDate = metadata_id[2] req = PAutils.HTTPRequest(sceneURL) detailsPageElements = HTML.ElementFromString(req.text) # Title metadata.title = PAutils.parseTitle( detailsPageElements.xpath('//span[@class="vdetitle"] | //h1') [0].text_content().strip(), siteNum) # Summary metadata.summary = detailsPageElements.xpath( '//span[@class="vdtx"] | //p[@class="videoDetail"]')[0].text_content( ).strip().replace('\n', '') # Studio metadata.studio = 'BangBros' # Tagline and Collection(s) metadata.collections.clear() tagline = detailsPageElements.xpath( '//script[@type="text/javascript"][contains(., "siteName")]' )[0].text_content().split('siteName = \'')[-1].split('\'')[0].strip() metadata.tagline = tagline metadata.collections.add(tagline) # Release Date if sceneDate: date_object = parse(sceneDate) metadata.originally_available_at = date_object metadata.year = metadata.originally_available_at.year # Genres movieGenres.clearGenres() genres = detailsPageElements.xpath( '//meta[@http-equiv="keywords"]/@content')[0].split(',') for genreLink in genres: if tagline.replace(' ', '').lower() not in genreLink.replace(' ', '').lower(): genreName = genreLink.strip() movieGenres.addGenre(genreName) # Posters xpaths = [ '//div[@class="hideWhilePlaying"]/img/@src', ] if tagline == 'Mia Khalifa': movieActors.addActor('Mia Khalifa', '') shootId = detailsPageElements.xpath( '//script[@type="text/javascript"][contains(., "siteName")]' )[0].text_content().split('com/')[-1].split('/')[0].strip() art.append( 'http://images.miakhalifa.com/shoots/%s/members/626x420.jpg' % shootId) for xpath in xpaths: for img in detailsPageElements.xpath(xpath): img = re.sub(r'////', 'http://', img) if 'http' not in img: img = 'http:' + img art.append(img) Log('Artwork found: %d' % len(art)) for idx, posterUrl in enumerate(art, 1): if not PAsearchSites.posterAlreadyExists(posterUrl, metadata): # Download image file for analysis try: image = PAutils.HTTPRequest(posterUrl) im = StringIO(image.content) resized_image = Image.open(im) width, height = resized_image.size # Add the image proxy items to the collection if width > 1: # Item is a poster metadata.posters[posterUrl] = Proxy.Media(image.content, sort_order=idx) if width > 100: # Item is an art item metadata.art[posterUrl] = Proxy.Media(image.content, sort_order=idx) except: pass return metadata
def update(metadata, siteNum, movieGenres, movieActors): metadata_id = str(metadata.id).split('|') sceneURL = PAutils.Decode(metadata_id[0]) req = PAutils.HTTPRequest(sceneURL) detailsPageElements = HTML.ElementFromString(req.text) # Title metadata.title = PAutils.parseTitle(detailsPageElements.xpath('//title')[0].text_content().strip(), siteNum) # Summary metadata.summary = detailsPageElements.xpath('//div[@class="player-info-desc"]')[0].text_content().strip() # Tagline and Collection(s) metadata.collections.clear() tagline = PAsearchSites.getSearchSiteName(siteNum).strip() metadata.studio = tagline metadata.collections.add(tagline) # Genres movieGenres.clearGenres() for genreLink in detailsPageElements.xpath('//span[@class="tags"]/a'): genreName = genreLink.text_content().strip() movieGenres.addGenre(genreName) # Actors movieActors.clearActors() for actorLink in detailsPageElements.xpath('//div[@class="player-info-row"]/a'): actorName = actorLink.text_content().strip() modelURL = actorLink.xpath('.//@href')[0] req = PAutils.HTTPRequest(modelURL) actorPageElements = HTML.ElementFromString(req.text) actorPhotoURL = actorPageElements.xpath('//div[@class="pornstar-bio-left"]//@src')[0] movieActors.addActor(actorName, actorPhotoURL) # Posters art = [] xpaths = [ '//div[@id="player"]//script', ] for xpath in xpaths: for img in detailsPageElements.xpath(xpath): match = re.search(r'(?<=(image: ")).*(?=")', img.text_content()) if match: img = match.group(0) art.append(img) Log('Artwork found: %d' % len(art)) for idx, posterUrl in enumerate(art, 1): if not PAsearchSites.posterAlreadyExists(posterUrl, metadata): # Download image file for analysis try: image = PAutils.HTTPRequest(posterUrl) im = StringIO(image.content) resized_image = Image.open(im) width, height = resized_image.size # Add the image proxy items to the collection if height > 1: # Item is a poster metadata.posters[posterUrl] = Proxy.Media(image.content, sort_order=idx) if width > 100: # Item is an art item metadata.art[posterUrl] = Proxy.Media(image.content, sort_order=idx) except: pass return metadata
def update(metadata, lang, siteNum, movieGenres, movieActors): splitScene = False metadata_id = str(metadata.id).split('|') sceneURL = PAutils.Decode(metadata_id[0]) sceneDate = metadata_id[2] req = PAutils.HTTPRequest(sceneURL) detailsPageElements = HTML.ElementFromString(req.text) if len(metadata_id) > 3: Log('Split Scene: %d' % int(metadata_id[3])) splitScene = True # Title metadata.title = PAutils.parseTitle( detailsPageElements.xpath('//h1[@class="description"]/text()')[0], siteNum).strip() if splitScene: metadata.title = ("%s [Scene %s]" % (metadata.title, metadata_id[3])) # Summary metadata.summary = detailsPageElements.xpath( '//div[@class="synopsis"]')[0].text_content().strip() # Studio try: studio = detailsPageElements.xpath( '//div[@class="studio"]/a/text()')[0].strip() except: studio = '' if studio: metadata.studio = studio # Tagline and Collection(s) metadata.collections.clear() tagline = '' try: tagline = detailsPageElements.xpath( '//p[contains(text(), "A scene from")]/a/text()')[0].strip() metadata.collections.add(tagline) except: try: tagline = detailsPageElements.xpath( '//a[@data-label="Series List"]/h2/text()')[0].strip().replace( "Series:", "").replace("(%s)" % studio, "").strip() metadata.collections.add(tagline) except: metadata.collections.add(metadata.studio) Log("Tagline: %s" % tagline) # Release Date if sceneDate: date_object = parse(sceneDate) metadata.originally_available_at = date_object metadata.year = metadata.originally_available_at.year # Genres movieGenres.clearGenres() for genreLink in detailsPageElements.xpath('//div[@class="categories"]/a'): genreName = genreLink.text_content().strip() movieGenres.addGenre(genreName) # Actors movieActors.clearActors() actors = [] if splitScene: actorNames = detailsPageElements.xpath( '//div[@class="item-grid item-grid-scene"]/div[@class="grid-item"][%d]/div/div[@class="scene-cast-list"]/a/text()' % int(metadata_id[3])) for name in actorNames: actors.append( detailsPageElements.xpath( '//div[@class="video-performer"]/a[./img[@title="%s"]]/span/span' % (name))[0]) else: actors = detailsPageElements.xpath( '//div[@class="video-performer"]/a/span/span') for actorLink in actors: actorName = actorLink.text_content().strip() actorPhotoURL = detailsPageElements.xpath( '//div[@class="video-performer"]/a/img[@title="%s"]/@data-bgsrc' % (actorName))[0].strip() if actorName: movieActors.addActor(actorName, actorPhotoURL) # Director metadata.directors.clear() director = metadata.directors.new() try: directorName = detailsPageElements.xpath( '//div[@class="director"]/a/text()')[0].text_content().split( ':')[2].strip() if not directorName == 'Unknown': director.name = directorName except: pass # Posters art = [] cover = '//div[@id="video-container-details"]/div/section/a/picture/source[1]/@data-srcset' scene = '//div[@class="item-grid item-grid-scene"]/div/a/img/@src' gallery = '//div[@id="video-container-details"]/div/section/div[2]/div[2]/a[@data-label="Gallery"]/@href' gallery_image = '//div[@class="item-grid item-grid-gallery"]/div[@class="grid-item"]/a/img/@data-src' try: art.append(detailsPageElements.xpath(cover)[0]) gallery = detailsPageElements.xpath(gallery) if gallery: req = PAutils.HTTPRequest( '%s%s' % (PAsearchSites.getSearchBaseURL(siteNum), gallery[0])) galleryPageElement = HTML.ElementFromString(req.text) art = art + galleryPageElement.xpath(gallery_image) if splitScene: art.append(detailsPageElements.xpath(scene)[int(metadata_id[3])]) except: pass images = [] posterExists = False Log('Artwork found: %d' % len(art)) numPosters = 20 numArt = 5 for idx, posterUrl in enumerate(art, 1): if not PAsearchSites.posterAlreadyExists(posterUrl, metadata): # Download image file for analysis try: if random.randint(0, 1) == 0 and idx != 0: continue image = PAutils.HTTPRequest( posterUrl, headers={'Referer': 'http://www.data18.empirestores.co'}) images.append(image) im = StringIO(image.content) resized_image = Image.open(im) width, height = resized_image.size # Add the image proxy items to the collection if height > width: # Item is a poster if numPosters != 0: numPosters = numPosters - 1 else: continue posterExists = True metadata.posters[posterUrl] = Proxy.Media(image.content, sort_order=idx) if width > height: # Item is an art item if numArt != 0: numArt = numArt - 1 else: continue metadata.art[posterUrl] = Proxy.Media(image.content, sort_order=idx) except: pass if not posterExists: for idx, image in enumerate(images, 1): try: im = StringIO(image.content) resized_image = Image.open(im) width, height = resized_image.size # Add the image proxy items to the collection if width > 1: # Item is a poster metadata.posters[art[idx - 1]] = Proxy.Media( image.content, sort_order=idx) except: pass return metadata
def update(metadata, lang, siteNum, movieGenres, movieActors, art): metadata_id = str(metadata.id).split('|') sceneURL = PAutils.Decode(metadata_id[0]) sceneDate = metadata_id[2] req = PAutils.HTTPRequest(sceneURL) detailsPageElements = HTML.ElementFromString(req.text) # Title metadata.title = PAutils.parseTitle(detailsPageElements.xpath('//h1')[0].text_content().strip(), siteNum) # Summary metadata.summary = detailsPageElements.xpath('//span[@class="grisoscuro"]')[0].text_content().strip() # Studio metadata.studio = 'FAKings' # Collections / Tagline metadata.collections.clear() tagline = PAutils.parseTitle(detailsPageElements.xpath('//strong[contains(., "Serie")]//following-sibling::a')[0].text_content().strip(), siteNum) metadata.tagline = tagline metadata.collections.add(tagline) # Genres movieGenres.clearGenres() for genreLink in detailsPageElements.xpath('//strong[contains(., "Categori")]//following-sibling::a'): genreName = genreLink.text_content().strip() movieGenres.addGenre(genreName) # Release Date if sceneDate: date_object = parse(sceneDate) metadata.originally_available_at = date_object metadata.year = metadata.originally_available_at.year # Posters img = '' # Actors movieActors.clearActors() for actorLink in detailsPageElements.xpath('//strong[contains(., "Actr")]//following-sibling::a'): actorName = actorLink.text_content().strip() modelURL = actorLink.xpath('.//@href')[0] req = PAutils.HTTPRequest(modelURL) actorPageElements = HTML.ElementFromString(req.text) actorPhotoURL = actorPageElements.xpath('//div[@class="zona-imagen"]//img[@class]/@src')[0].strip() if not img: for scene in actorPageElements.xpath('//div[@class="zona-listado2"]'): if sceneURL == scene.xpath('.//@href')[0]: img = scene.xpath('.//img[@class]/@src')[0].strip() art.append(img) break movieActors.addActor(actorName, actorPhotoURL) Log('Artwork found: %d' % len(art)) for idx, posterUrl in enumerate(art, 1): if not PAsearchSites.posterAlreadyExists(posterUrl, metadata): # Download image file for analysis try: image = PAutils.HTTPRequest(posterUrl) im = StringIO(image.content) resized_image = Image.open(im) width, height = resized_image.size # Add the image proxy items to the collection if height > 1: # Item is a poster metadata.posters[posterUrl] = Proxy.Media(image.content, sort_order=idx) if width > 100: # Item is an art item metadata.art[posterUrl] = Proxy.Media(image.content, sort_order=idx) except: pass return metadata
def update(metadata, lang, siteNum, movieGenres, movieActors): metadata_id = str(metadata.id).split('|') sceneName = PAutils.Decode(metadata_id[0]) sceneURL = PAsearchSites.getSearchBaseURL( siteNum) + '/graphql?query=' + update_query % ( sceneName, PAsearchSites.getSearchSiteName(siteNum).upper()) detailsPageElements = getDatafromAPI(sceneURL) video = detailsPageElements['findOneVideo'] pictureset = video['carousel'] # Title metadata.title = PAutils.parseTitle(video['title'], siteNum) # Summary metadata.summary = video['description'] # Director if video['directors']: director = metadata.directors.new() director.name = video['directors'][0]['name'] # Studio metadata.studio = PAsearchSites.getSearchSiteName(siteNum).title() # Tagline and Collection(s) metadata.collections.clear() metadata.collections.add(metadata.studio) # Release Date date_object = parse(video['releaseDate']) metadata.originally_available_at = date_object metadata.year = metadata.originally_available_at.year # Genres if video['categories']: movieGenres.clearGenres() for tag in video['categories']: genreName = tag['name'] movieGenres.addGenre(genreName) # Actors movieActors.clearActors() actors = video['models'] for actor in actors: actorName = actor['name'] actorPhotoURL = '' if actor['images']: actorPhotoURL = actor['images']['listing'][0]['highdpi']['double'] movieActors.addActor(actorName, actorPhotoURL) # Posters art = [] for name in ['movie', 'poster']: if name in video['carousel'] and video['images'][name]: image = video['images'][name][-1] if 'highdpi' in image: art.append(image['highdpi']['3x']) else: art.append(image['src']) break for image in pictureset: img = image['listing'][0]['highdpi']['triple'] art.append(img) Log('Artwork found: %d' % len(art)) for idx, posterUrl in enumerate(art, 1): if not PAsearchSites.posterAlreadyExists(posterUrl, metadata): # Download image file for analysis try: image = PAutils.HTTPRequest(posterUrl) im = StringIO(image.content) resized_image = Image.open(im) width, height = resized_image.size # Add the image proxy items to the collection if width > 1 or height > width: # Item is a poster metadata.posters[posterUrl] = Proxy.Media(image.content, sort_order=idx) if width > 100 and width > height and idx > 1: # Item is an art item metadata.art[posterUrl] = Proxy.Media(image.content, sort_order=idx) except: pass return metadata