Пример #1
0
def update(metadata,siteID,movieGenres,movieActors):
    temp = str(metadata.id).split("|")[0]
    Log('temp: ' + temp)
    url = PAsearchSites.getSearchSearchURL(siteID) + temp
    Log('url:' + url)
    detailsPageElements = HTML.ElementFromURL(url)
    # Studio
    metadata.studio = PAsearchSites.getSearchSiteName(siteID)
    Log('Studio: ' + metadata.studio)

    # Summary
    metadata.summary = detailsPageElements.xpath('//p[@class="video-description"]')[0].text_content().strip()

    # Tagline and Collection
    tagline = metadata.studio
    metadata.collections.clear()
    metadata.tagline = tagline
    metadata.collections.add(tagline)

    # Genres
    movieGenres.clearGenres()
    genres = detailsPageElements.xpath('//a[@class="video-tag"]')
    if len(genres) > 0:
        for genre in genres:
            movieGenres.addGenre(genre.text_content())


    # Actors
    movieActors.clearActors()
    actors = detailsPageElements.xpath('//a[contains(@class,"video-actor-link")]')
    if len(actors) > 0:
        for actorLink in actors:
            actorName = actorLink.text_content()
            actorPageURL = PAsearchSites.getSearchBaseURL(siteID) + actorLink.get("href")
            actorPage = HTML.ElementFromURL(actorPageURL)
            actorPhotoURL = actorPage.xpath('//img[@class="girl-details-photo"]')[0].get("src").split('?')
            movieActors.addActor(actorName,actorPhotoURL[0])

    # Posters/Background
    valid_names = list()
    metadata.posters.validate_keys(valid_names)
    metadata.art.validate_keys(valid_names)
    posters = detailsPageElements.xpath('//div[contains(@class,"gallery-item")]')
    posterNum = 1
    for posterCur in posters:
        posterURL = posterCur.get("data-big-image")
        metadata.posters[posterURL] = Proxy.Preview(HTTP.Request(posterURL, headers={'Referer': 'http://www.google.com'}).content, sort_order = posterNum)
        posterNum = posterNum + 1

    backgroundURL = detailsPageElements.xpath('//img[@class="video-image"]')[0].get("src").split('?')
    metadata.art[backgroundURL[0]] = Proxy.Preview(HTTP.Request(backgroundURL[0], headers={'Referer': 'http://www.google.com'}).content, sort_order = 1)

    # Date
    date = detailsPageElements.xpath('.//div[@class="video-details"]//p[@class="video-upload-date"]')[0].text_content().split(":")
    dateFixed = date[1].strip()
    Log('DateFixed: ' + dateFixed)
    date_object = datetime.strptime(dateFixed, '%B %d, %Y')
    metadata.originally_available_at = date_object
    metadata.year = metadata.originally_available_at.year

    # Title
    titleOfficial = detailsPageElements.xpath('//div[@class="video-rating-and-details"]//h1[@class="heading heading--2 video-title"]')[0].text_content()
    metadata.title = metadata.studio + " - " + titleOfficial
    Log('Title: ' + metadata.title)

    return metadata
Пример #2
0
def search(results, lang, siteNum, searchData):
    searchResults = []

    url = PAsearchSites.getSearchSearchURL(
        siteNum) + searchData.encoded + '&sid=587'
    req = PAutils.HTTPRequest(url)
    siteSearchResults = HTML.ElementFromString(req.text)
    for searchResult in siteSearchResults.xpath('//div[@class="itemm"]'):
        sceneURL = PAsearchSites.getSearchBaseURL(
            siteNum) + '/tour/%s' % searchResult.xpath('.//@href')[0]

        searchResults.append(sceneURL)

    googleResults = PAutils.getFromGoogleSearch(searchData.title, siteNum)
    for result in googleResults:
        pattern = re.search(r'(?<=\dpp\/).*(?=\/)', result)
        if pattern:
            sceneID = pattern.group(0)
            sceneURL = PAsearchSites.getSearchBaseURL(
                siteNum) + '/t1/refstat.php?lid=%s&sid=584' % sceneID

            if ('content' in result) and sceneURL not in searchResults:
                searchResults.append(sceneURL)

    for sceneURL in searchResults:
        req = PAutils.HTTPRequest(sceneURL)
        detailsPageElements = HTML.ElementFromString(req.text)

        if ('content' in req.url):
            titleNoFormatting = detailsPageElements.xpath(
                '//h2[@class="vidtitle"]')[0].text_content().strip().replace(
                    '\"', '')
            curID = PAutils.Encode(sceneURL)
            date = detailsPageElements.xpath(
                '//h3[@class="releases"]//preceding-sibling::text()')[0].strip(
                )

            if date:
                releaseDate = parse(date).strftime('%Y-%m-%d')
            else:
                releaseDate = searchData.dateFormat(
                ) if searchData.date else ''
            displayDate = releaseDate if date else ''

            if searchData.date and displayDate:
                score = 100 - Util.LevenshteinDistance(searchData.date,
                                                       releaseDate)
            else:
                score = 100 - Util.LevenshteinDistance(
                    searchData.title.lower(), titleNoFormatting.lower())

            results.Append(
                MetadataSearchResult(
                    id='%s|%d|%s' % (curID, siteNum, releaseDate),
                    name='%s [%s] %s' %
                    (titleNoFormatting,
                     PAsearchSites.getSearchSiteName(siteNum), displayDate),
                    score=score,
                    lang=lang))

    return results
def search(results, lang, siteNum, searchData):
    # Scenes by name
    req = PAutils.HTTPRequest(
        PAsearchSites.getSearchSearchURL(siteNum) + searchData.encoded)
    searchResults = HTML.ElementFromString(req.text)
    for searchResult in searchResults.xpath(
            '//div[@class="scenes list"]/div[@class="items"]/div[@class="scene thumbnail "]'
    ):
        titleNoFormatting = searchResult.xpath(
            './/div[@class="textual"]/a')[0].text_content().strip()
        curID = PAutils.Encode(
            searchResult.xpath('.//a[@class="title"]/@href')[0])

        score = 100 - Util.LevenshteinDistance(searchData.title.lower(),
                                               titleNoFormatting.lower())

        results.Append(
            MetadataSearchResult(
                id='%s|%d' % (curID, siteNum),
                name='%s [%s]' %
                (titleNoFormatting, PAsearchSites.getSearchSiteName(siteNum)),
                score=score,
                lang=lang))

    # Movies by name
    for searchResult in searchResults.xpath(
            '//div[@class="movies list"]/div[@class="items"]/a[@class="movie thumbnail"]'
    ):
        titleNoFormatting = searchResult.xpath(
            './h2')[0].text_content().strip()
        movieLink = searchResult.xpath('./@href')[0]
        curID = PAutils.Encode(movieLink)
        score = 100 - Util.LevenshteinDistance(searchData.title.lower(),
                                               titleNoFormatting.lower())

        results.Append(
            MetadataSearchResult(
                id='%s|%d' % (curID, siteNum),
                name='%s - Full Movie [%s]' %
                (titleNoFormatting, PAsearchSites.getSearchSiteName(siteNum)),
                score=score,
                lang=lang))

        # Also append all the scenes from matching movies
        req = PAutils.HTTPRequest(
            PAsearchSites.getSearchBaseURL(siteNum) + movieLink)
        moviePageElements = HTML.ElementFromString(req.text)
        for movieScene in moviePageElements.xpath(
                '//div[@class="scenes"]/div[@class="list"]/div[@class="scene thumbnail "]'
        ):
            titleNoFormatting = movieScene.xpath(
                './/div[@class="textual"]/a')[0].text_content().strip()
            curID = curID = PAutils.Encode(
                movieScene.xpath('.//a[@class="title"]/@href')[0])

            score = 100 - Util.LevenshteinDistance(searchData.title.lower(),
                                                   titleNoFormatting.lower())

            results.Append(
                MetadataSearchResult(
                    id='%s|%d' % (curID, siteNum),
                    name='%s [%s]' %
                    (titleNoFormatting,
                     PAsearchSites.getSearchSiteName(siteNum)),
                    score=score,
                    lang=lang))

    return results
def search(results, lang, siteNum, searchData):
    sceneID = searchData.title.split(' ', 1)[0]
    if unicode(sceneID, 'UTF-8').isdigit():
        searchData.title = searchData.title.replace(sceneID, '', 1).strip()
    else:
        sceneID = None

    if sceneID:
        sceneURL = PAsearchSites.getSearchBaseURL(
            siteNum) + '/post/details/' + sceneID
        req = PAutils.HTTPRequest(sceneURL)
        detailsPageElements = HTML.ElementFromString(req.text)

        curID = PAutils.Encode(sceneURL)
        titleNoFormatting = detailsPageElements.xpath(
            '//div[contains(@class, "mediaHeader")]//span[contains(@class, "title")]'
        )[0].text_content().strip()
        studio = detailsPageElements.xpath('//span[contains(@class, "type")]')[
            0].text_content().split('|')[0].strip()
        score = 100

        results.Append(
            MetadataSearchResult(id='%s|%d' % (curID, siteNum),
                                 name='%s [%s]' % (titleNoFormatting, studio),
                                 score=score,
                                 lang=lang))
    else:
        searchData.encoded = searchData.title.replace(' ', '+')
        req = PAutils.HTTPRequest(
            PAsearchSites.getSearchSearchURL(siteNum) + searchData.encoded)
        searchResults = HTML.ElementFromString(req.text)
        for searchResult in searchResults.xpath(
                '//div[contains(@class, "post")]'):
            titleNoFormatting = searchResult.xpath(
                './/span[contains(@class, "title")]')[0].text_content().strip(
                )
            sceneURL = searchResult.xpath(
                './/a[contains(@class, "media")]/@href')[0]

            studio = searchResult.xpath('.//span[contains(@class, "source")]'
                                        )[0].text_content().strip()
            sceneCover = PAutils.Encode(
                searchResult.xpath(
                    './/a[contains(@class, "media")]//img[contains(@class, "image")]/@src'
                )[0])
            releaseDate = searchData.dateFormat() if searchData.date else ''

            curID = PAutils.Encode(sceneURL)

            score = 90 - Util.LevenshteinDistance(searchData.title.lower(),
                                                  titleNoFormatting.lower())
            if PAsearchSites.getSearchSiteName(
                    siteNum).lower() == studio.lower():
                score += 10

            results.Append(
                MetadataSearchResult(id='%s|%d|%s|%s' %
                                     (curID, siteNum, releaseDate, sceneCover),
                                     name='%s [%s]' %
                                     (titleNoFormatting, studio),
                                     score=score,
                                     lang=lang))

    return results
Пример #5
0
def search(results, encodedTitle, searchTitle, siteNum, lang, searchDate):
    sceneID = searchTitle.split(' ', 1)[0]
    if unicode(sceneID, 'UTF-8').isdigit():
        searchTitle = searchTitle.replace(sceneID, '', 1).strip()
    else:
        sceneID = None

    if sceneID and not searchTitle:
        req = PAutils.HTTPRequest(PAsearchSites.getSearchBaseURL(siteNum) +
                                  '/' + sceneID,
                                  cookies={'sst': 'ulang-en'})
        if req.ok:
            detailsPageElements = HTML.ElementFromString(req.text)
            titleNoFormatting = detailsPageElements.xpath(
                '//h1[@class="detail__title"]')[0].text_content()
            curID = PAutils.Encode(
                PAsearchSites.getSearchBaseURL(siteNum) + '/' + sceneID)

            releaseDate = ''
            date = detailsPageElements.xpath(
                '//span[@class="detail__date"]')[0].text_content().strip()
            if date:
                releaseDate = parse(date).strftime('%Y-%m-%d')

            score = 100

            results.Append(
                MetadataSearchResult(id='%s|%d' % (curID, siteNum),
                                     name='[%s] %s %s' %
                                     (PAsearchSites.getSearchSiteName(siteNum),
                                      titleNoFormatting, releaseDate),
                                     score=score,
                                     lang=lang))
    else:
        encodedTitle = searchTitle.replace(' ', '+')
        req = PAutils.HTTPRequest(PAsearchSites.getSearchSearchURL(siteNum) +
                                  encodedTitle,
                                  cookies={'sst': 'ulang-en'})
        searchResults = HTML.ElementFromString(req.text)
        for searchResult in searchResults.xpath(
                '//ul[@class="cards-list"]//li'):
            titleNoFormatting = searchResult.xpath(
                './/div[@class="card__footer"]//div[@class="card__h"]/text()'
            )[0]
            curID = PAutils.Encode(searchResult.xpath('.//a/@href')[0])
            releaseDate = parse(
                searchResult.xpath('.//div[@class="card__date"]')
                [0].text_content().strip()).strftime('%Y-%m-%d')

            if searchDate:
                score = 100 - Util.LevenshteinDistance(searchDate, releaseDate)
            else:
                score = 100 - Util.LevenshteinDistance(
                    searchTitle.lower(), titleNoFormatting.lower())

            results.Append(
                MetadataSearchResult(
                    id='%s|%d' % (curID, siteNum),
                    name='%s [%s] %s' %
                    (titleNoFormatting,
                     PAsearchSites.getSearchSiteName(siteNum), releaseDate),
                    score=score,
                    lang=lang))

    return results
def update(metadata, siteNum, movieGenres, movieActors):
    metadata_id = str(metadata.id).split('|')
    sceneID = metadata_id[0]

    url = PAsearchSites.getSearchSearchURL(
        siteNum
    ) + '?x-algolia-application-id=I6P9Q9R18E&x-algolia-api-key=08396b1791d619478a55687b4deb48b4'
    detailsPageElements = getAlgolia(url, 'nacms_scenes_production',
                                     'filters=id=' + sceneID)[0]

    # Title
    metadata.title = detailsPageElements['title']

    # Summary
    metadata.summary = detailsPageElements['synopsis']

    # Studio
    metadata.studio = 'Naughty America'

    # Tagline and Collection(s)
    metadata.collections.clear()
    metadata.collections.add(metadata.studio)
    metadata.collections.add(detailsPageElements['site'])

    # Release Date
    date_object = datetime.fromtimestamp(detailsPageElements['published_at'])
    metadata.originally_available_at = date_object
    metadata.year = metadata.originally_available_at.year

    # Genres
    movieGenres.clearGenres()
    for genreLink in detailsPageElements['fantasies']:
        genreName = genreLink

        movieGenres.addGenre(genreName)

    # Actors
    movieActors.clearActors()
    for actorLink in detailsPageElements['performers']:
        actorName = actorLink
        actorPhotoURL = ''

        actorsPageURL = 'https://www.naughtyamerica.com/pornstar/' + actorName.lower(
        ).replace(' ', '-').replace("'", '')
        req = PAutils.HTTPRequest(actorsPageURL)
        actorsPageElements = HTML.ElementFromString(req.text)
        img = actorsPageElements.xpath('//img[@class="performer-pic"]/@src')
        if img:
            actorPhotoURL = 'https:' + img[0]

        movieActors.addActor(actorName, actorPhotoURL)

    # Posters
    art = []

    req = PAutils.HTTPRequest('https://www.naughtyamerica.com/scene/0' +
                              sceneID)
    scenePageElements = HTML.ElementFromString(req.text)
    for photo in scenePageElements.xpath(
            '//div[contains(@class, "contain-scene-images") and contains(@class, "desktop-only")]/a/@href'
    ):
        img = 'https:' + re.sub(
            r'images\d+', 'images1', photo, 1, flags=re.IGNORECASE)
        art.append(img)

    Log('Artwork found: %d' % len(art))
    for idx, posterUrl in enumerate(art, 1):
        if not PAsearchSites.posterAlreadyExists(posterUrl, metadata):
            # Download image file for analysis
            try:
                image = PAutils.HTTPRequest(posterUrl)
                im = StringIO(image.content)
                resized_image = Image.open(im)
                width, height = resized_image.size
                # Add the image proxy items to the collection
                if width > 1:
                    # Item is a poster
                    metadata.posters[posterUrl] = Proxy.Media(image.content,
                                                              sort_order=idx)
                if width > 100 and width > height:
                    # Item is an art item
                    metadata.art[posterUrl] = Proxy.Media(image.content,
                                                          sort_order=idx)
            except:
                pass

    return metadata
Пример #7
0
def search(results, lang, siteNum, searchData):
    directURL = searchData.title.replace(' ', '-').lower()
    if '/' not in directURL:
        directURL = directURL.replace('-', '/', 1)

    shootID = directURL.split('/', 2)[0]
    if not unicode(shootID, 'UTF-8').isdigit():
        shootID = None
        directURL = directURL.replace('/', '-', 1)
    else:
        directURL = directURL.split('/')[1]

    directURL = PAsearchSites.getSearchSearchURL(siteNum) + directURL
    searchResultsURLs = [directURL]

    googleResults = PAutils.getFromGoogleSearch(searchData.title, siteNum)

    for sceneURL in googleResults:
        sceneURL = sceneURL.rsplit('?', 1)[0]
        if sceneURL not in searchResultsURLs:
            if ('/movies/' in sceneURL):
                searchResultsURLs.append(sceneURL)

    for sceneURL in searchResultsURLs:
        detailsPageElements = getJSONfromPage(sceneURL)

        if detailsPageElements:
            contentName = None
            for name in ['moviesContent', 'videosContent']:
                if name in detailsPageElements and detailsPageElements[name]:
                    contentName = name
                    break

            if contentName:
                detailsPageElements = detailsPageElements[contentName]
                curID = detailsPageElements.keys()[0]
                detailsPageElements = detailsPageElements[curID]
                titleNoFormatting = detailsPageElements['title']
                if 'site' in detailsPageElements:
                    subSite = detailsPageElements['site']['name']
                else:
                    subSite = PAsearchSites.getSearchSiteName(siteNum)

                if 'publishedDate' in detailsPageElements:
                    releaseDate = parse(
                        detailsPageElements['publishedDate']).strftime(
                            '%Y-%m-%d')
                else:
                    releaseDate = searchData.dateFormat(
                    ) if searchData.date else ''
                displayDate = releaseDate if 'publishedDate' in detailsPageElements else ''

                if searchData.date and displayDate:
                    score = 100 - Util.LevenshteinDistance(
                        searchData.date, releaseDate)
                else:
                    score = 100 - Util.LevenshteinDistance(
                        searchData.title.lower(), titleNoFormatting.lower())

                results.Append(
                    MetadataSearchResult(
                        id='%s|%d|%s|%s' %
                        (curID, siteNum, releaseDate, contentName),
                        name='%s [Mylf/%s] %s' %
                        (titleNoFormatting, subSite, displayDate),
                        score=score,
                        lang=lang))

    return results
def update(metadata, siteID, movieGenres, movieActors):
    Log('******UPDATE CALLED*******')

    url = str(metadata.id).split("|")[0].replace('+', '/').replace('?', '!')
    if "dvds" in url:
        sceneType = "DVD"
        Log("Is DVD")
    else:
        sceneType = "Scene"
        Log("Is Scene")
    detailsPageElements = HTML.ElementFromURL(url)
    art = []
    metadata.collections.clear()
    movieGenres.clearGenres()
    movieActors.clearActors()

    # Studio
    metadata.studio = 'New Sensations'

    if sceneType == "Scene":
        Log("SceneUpdate")
        # Title
        metadata.title = detailsPageElements.xpath(
            '//div[@class="trailerVideos clear"]/div[1]')[0].text_content(
            ).strip()

        # Summary
        metadata.summary = detailsPageElements.xpath(
            '//div[@class="trailerInfo"]/p')[0].text_content().strip()

        # Tagline and Collection(s)
        # DVD Name
        dvdName = detailsPageElements.xpath(
            '//div[@class="trailerInfo"]/ul/li[4]')[0].text_content().strip()
        metadata.tagline = dvdName
        metadata.collections.add(dvdName)
        #Site Name
        siteName = PAsearchSites.getSearchSiteName(siteID).strip()
        metadata.collections.add(siteName)

        # Genres
        genres = detailsPageElements.xpath(
            '//div[@class="trailerInfo"]/ul/li[3]/a')
        if len(genres) > 0:
            for genreLink in genres:
                genreName = genreLink.text_content().strip().lower()
                movieGenres.addGenre(genreName)

        # Release Date
        try:
            date = str(metadata.id).split("|")[2]
            if len(date) > 0:
                date_object = parse(date)
                metadata.originally_available_at = date_object
                metadata.year = metadata.originally_available_at.year
                Log("Date from file")
        except:
            pass

        # Actors
        actors = detailsPageElements.xpath(
            '//div[@class="trailerInfo"]/ul/li[1]/span/a')
        if len(actors) > 0:
            if len(actors) == 3:
                movieGenres.addGenre("Threesome")
            if len(actors) == 4:
                movieGenres.addGenre("Foursome")
            if len(actors) > 4:
                movieGenres.addGenre("Orgy")
            for actorLink in actors:
                actorName = str(actorLink.text_content().strip())
                try:
                    actorPageURL = actorLink.get("href")
                    actorPage = HTML.ElementFromURL(actorPageURL)
                    actorPhotoURL = actorPage.xpath(
                        '//div[@class="modelPicture"]/div/img')[0].get(
                            "src0_3x")
                except:
                    actorPhotoURL = ''
                movieActors.addActor(actorName, actorPhotoURL)

        ### Posters and artwork ###

        # Video trailer background image
        j = 1
        try:
            twitterBG = detailsPageElements.xpath(
                '//div[@class="trailerArea"]/a[1]')[0].get('href')
            twitterBG = PAsearchSites.getSearchSearchURL(siteID) + twitterBG
            metadata.art[twitterBG] = Proxy.Preview(HTTP.Request(
                twitterBG, headers={
                    'Referer': 'http://www.google.com'
                }).content,
                                                    sort_order=j)
            j += 1
        except:
            pass

        # DVD Page
        posterNum = 1
        try:
            dvdPageLink = detailsPageElements.xpath(
                '//div[@class="trailerInfo"]/ul/li[4]/a')[0].get('href')
            dvdPageElements = HTML.ElementFromURL(dvdPageLink)
            dvdPosterURL = dvdPageElements.xpath(
                '//div[@class="dvdcover"]//img')[0].get("src")
            if dvdPosterURL == None:
                dvdPosterURL = dvdPageElements.xpath(
                    '//div[@class="dvdcover"]//img')[0].get("data-src")
            metadata.posters[dvdPosterURL] = Proxy.Preview(
                HTTP.Request(dvdPosterURL,
                             headers={
                                 'Referer': 'http://www.google.com'
                             }).content,
                sort_order=posterNum)
            posterNum += 1
        except:
            Log("DVD Cover not found")
            pass

    else:
        Log("DVDUpdate")
        # Title
        title = detailsPageElements.xpath(
            '//div[@class="dvdSections clear"]/div[1]')[0].text_content(
            ).replace("DVDS /", "").strip()
        metadata.title = title

        # Summary
        metadata.summary = detailsPageElements.xpath(
            '//div[@class="dDetails"]/p')[0].text_content().strip()

        # Tagline and Collection(s)
        # DVD Name
        dvdName = title
        metadata.tagline = dvdName
        metadata.collections.add(dvdName)
        # Site Name
        siteName = PAsearchSites.getSearchSiteName(siteID).strip()
        metadata.collections.add(siteName)

        # Genres
        genres = detailsPageElements.xpath(
            '//div[@class="dvdDetails clear"]/ul/li[2]/a')
        if len(genres) > 0:
            for genreLink in genres:
                genreName = genreLink.text_content().strip().lower()
                movieGenres.addGenre(genreName)

        # Release Date
        date = detailsPageElements.xpath(
            '//div[@class="dvdDetails clear"]/ul/li[1]')[0].text_content(
            ).replace('Released:', '').strip()
        if len(date) > 0:
            try:
                date_object = datetime.strptime(date, '%Y-%m-%d')
            except:
                date_object = datetime.strptime(date, '%m/%d/%y')
            metadata.originally_available_at = date_object
            metadata.year = metadata.originally_available_at.year

        # Actors
        try:
            actors = detailsPageElements.xpath(
                '//span[@class="tour_update_models"]/a')
            if len(actors) > 0:
                for actorLink in actors:
                    actorName = str(actorLink.text_content().strip())
                    try:
                        actorPageURL = actorLink.get("href")
                        actorPage = HTML.ElementFromURL(actorPageURL)
                        actorPhotoURL = actorPage.xpath(
                            '//div[@class="modelPicture"]/div/img')[0].get(
                                "src0_3x")
                    except:
                        actorPhotoURL = ''
                    movieActors.addActor(actorName, actorPhotoURL)
        except:
            actorsList = detailsPageElements.xpath(
                '//div[@class="dvdDetails clear"]/div[2]/p')[0].text_content(
                ).split('Featuring:')[1]
            actors = actorsList.split(",")
            if len(actors) > 0:
                for actorLink in actors:
                    actorName = str(actorLink.strip())
                    actorPhotoURL = ''
                    movieActors.addActor(actorName, actorPhotoURL)

        ### Posters and artwork ###

        # DVD Cover
        posterNum = 1
        try:
            dvdPosterURL = detailsPageElements.xpath(
                '//div[@class="dvdcover"]//img')[0].get("src")
            if dvdPosterURL == None:
                dvdPosterURL = detailsPageElements.xpath(
                    '//div[@class="dvdcover"]//img')[0].get("data-src")
            metadata.posters[dvdPosterURL] = Proxy.Preview(
                HTTP.Request(dvdPosterURL,
                             headers={
                                 'Referer': 'http://www.google.com'
                             }).content,
                sort_order=posterNum)
            posterNum += 1
        except:
            Log("DVD Cover not found")
            pass

    return metadata
def search(results, encodedTitle, title, searchTitle, siteNum, lang,
           searchByDateActor, searchDate, searchSiteID):
    if searchSiteID != 9999:
        siteNum = searchSiteID

    cookies = get_Cookies(PAsearchSites.getSearchBaseURL(siteNum))
    headers = {
        'Instance': cookies['instance_token'],
    }

    sceneID = None
    splited = searchTitle.split(' ')
    if unicode(splited[0], 'utf8').isdigit():
        sceneID = splited[0]
        searchTitle = searchTitle.replace(sceneID, '', 1).strip()

    for sceneType in ['scene', 'movie', 'serie', 'trailer']:
        if sceneID and not searchTitle:
            url = PAsearchSites.getSearchSearchURL(
                siteNum) + '/v2/releases?type=%s&id=%s' % (sceneType, sceneID)
        else:
            url = PAsearchSites.getSearchSearchURL(
                siteNum) + '/v2/releases?type=%s&search=%s' % (sceneType,
                                                               encodedTitle)

        data = None
        req = urllib.Request(url, headers=headers)
        try:
            data = urllib.urlopen(req).read()
        except Exception as e:
            Log(e)
            pass

        if data:
            searchResults = json.loads(data)
            for searchResult in searchResults['result']:
                titleNoFormatting = searchResult['title']
                releaseDate = parse(
                    searchResult['dateReleased']).strftime('%Y-%m-%d')
                curID = searchResult['id']
                siteName = searchResult['brand'].title()
                subSite = ''
                if 'collections' in searchResult and searchResult[
                        'collections']:
                    subSite = searchResult['collections'][0]['name']
                siteDisplay = '%s/%s' % (siteName,
                                         subSite) if subSite else siteName

                if sceneID:
                    score = 100 - Util.LevenshteinDistance(sceneID, curID)
                elif searchDate:
                    score = 100 - Util.LevenshteinDistance(
                        searchDate, releaseDate)
                else:
                    score = 100 - Util.LevenshteinDistance(
                        searchTitle.lower(), titleNoFormatting.lower())

                if sceneType == 'trailer':
                    titleNoFormatting = '[%s] %s' % (sceneType.capitalize(),
                                                     titleNoFormatting)
                    score = score - 10

                results.Append(
                    MetadataSearchResult(
                        id='%s|%d|%s' % (curID, siteNum, sceneType),
                        name='%s [%s] %s' %
                        (titleNoFormatting, siteDisplay, releaseDate),
                        score=score,
                        lang=lang))

    return results
Пример #10
0
def search(results, encodedTitle, title, searchTitle, siteNum, lang,
           searchByDateActor, searchDate, searchSiteID):
    if searchSiteID != 9999:
        siteNum = searchSiteID

    urlWowXXX = PAsearchSites.getSearchSearchURL(
        siteNum) + '%22' + encodedTitle + '%22'
    urlWowTV = 'https://www.wowgirls.tv/?s=%22' + encodedTitle + '%22'
    headers = {
        'User-Agent':
        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36'
    }

    try:
        searchResultsWowXXX = HTML.ElementFromURL(urlWowXXX)
    except:
        request = urllib.Request(urlWowXXX, headers=headers)
        response = urllib.urlopen(request,
                                  context=ssl.SSLContext(ssl.PROTOCOL_TLSv1))
        htmlstring = response.read()
        searchResultsWowXXX = fromstring(htmlstring)

    Log('Search-Video-Number: ' +
        searchResultsWowXXX.xpath('//span[@class="search-video-number"]')
        [0].text_content().split(' ', 1)[0])
    if int(
            searchResultsWowXXX.xpath('//span[@class="search-video-number"]')
        [0].text_content().split(' ', 1)[0]) > 0:
        Log('Title Found on wowgirls.xxx')

        for searchResult in searchResultsWowXXX.xpath(
                '//div[@class="videos-list"]/article'):
            titleNoFormatting = searchResult.xpath('.//a')[0].get(
                'title').strip()
            curID = searchResult.xpath('.//a')[0].get('href').replace(
                '/', '_').replace('?', '!')
            score = 100 - Util.LevenshteinDistance(searchTitle.lower(),
                                                   titleNoFormatting.lower())
            if searchDate:
                releaseDate = parse(searchDate).strftime('%Y-%m-%d')
            else:
                releaseDate = ''
            results.Append(
                MetadataSearchResult(
                    id=curID + "|" + str(siteNum) + "|" + releaseDate,
                    name=titleNoFormatting + " [WowGirls.xxx] ",
                    score=score,
                    lang=lang))

    else:
        Log('Title not found on wowgirls.xxx, trying wowgirls.tv')

        try:
            searchResultsWowTV = HTML.ElementFromURL(urlWowTV)
        except:
            request = urllib.Request(urlWowTV, headers=headers)
            response = urllib.urlopen(request,
                                      context=ssl.SSLContext(
                                          ssl.PROTOCOL_TLSv1))
            htmlstring = response.read()
            searchResultsWowTV = fromstring(htmlstring)

        if len(searchResultsWowTV.xpath('//h1')) == 0:
            Log('Title found on wowgirls.tv')

            for searchResult in searchResultsWowTV.xpath(
                    '//div[@class="entry clearfix latest"]'):
                titleNoFormatting = searchResult.xpath(
                    './/h3/a')[0].text_content().strip()
                curID = searchResult.xpath('.//h3/a')[0].get('href').replace(
                    '/', '_').replace('?', '!')
                score = 100 - Util.LevenshteinDistance(
                    searchTitle.lower(), titleNoFormatting.lower())
                if searchDate:
                    releaseDate = parse(searchDate).strftime('%Y-%m-%d')
                else:
                    releaseDate = ''
                results.Append(
                    MetadataSearchResult(
                        id=curID + "|" + str(siteNum) + "|" + releaseDate,
                        name=titleNoFormatting + " [WowGirls.tv] ",
                        score=score,
                        lang=lang))

        else:
            Log('No exact Title found, trying normal Search on wowgirls.xxx and .tv'
                )

            urlWowXXX = PAsearchSites.getSearchSearchURL(
                siteNum) + encodedTitle
            urlWowTV = 'https://www.wowgirls.tv/?s=' + encodedTitle

            try:
                searchResultsWowXXX = HTML.ElementFromURL(urlWowXXX)
            except:
                request = urllib.Request(urlWowXXX, headers=headers)
                response = urllib.urlopen(request,
                                          context=ssl.SSLContext(
                                              ssl.PROTOCOL_TLSv1))
                htmlstring = response.read()
                searchResultsWowXXX = fromstring(htmlstring)

            for searchResult in searchResultsWowXXX.xpath(
                    '//div[@class="videos-list"]/article'):
                titleNoFormatting = searchResult.xpath('.//a')[0].get(
                    'title').strip()
                curID = searchResult.xpath('.//a')[0].get('href').replace(
                    '/', '_').replace('?', '!')
                score = 100 - Util.LevenshteinDistance(
                    searchTitle.lower(), titleNoFormatting.lower())
                if searchDate:
                    releaseDate = parse(searchDate).strftime('%Y-%m-%d')
                else:
                    releaseDate = ''
                results.Append(
                    MetadataSearchResult(
                        id=curID + "|" + str(siteNum) + "|" + releaseDate,
                        name=titleNoFormatting + " [WowGirls.xxx] ",
                        score=score,
                        lang=lang))

            try:
                searchResultsWowTV = HTML.ElementFromURL(urlWowTV)
            except:
                request = urllib.Request(urlWowTV, headers=headers)
                response = urllib.urlopen(request,
                                          context=ssl.SSLContext(
                                              ssl.PROTOCOL_TLSv1))
                htmlstring = response.read()
                searchResultsWowTV = fromstring(htmlstring)

            for searchResult in searchResultsWowTV.xpath(
                    '//div[@class="entry clearfix latest"]'):
                titleNoFormatting = searchResult.xpath(
                    './/h3/a')[0].text_content().strip()
                curID = searchResult.xpath('.//h3/a')[0].get('href').replace(
                    '/', '_').replace('?', '!')
                score = 100 - Util.LevenshteinDistance(
                    searchTitle.lower(), titleNoFormatting.lower())
                if searchDate:
                    releaseDate = parse(searchDate).strftime('%Y-%m-%d')
                else:
                    releaseDate = ''
                results.Append(
                    MetadataSearchResult(
                        id=curID + "|" + str(siteNum) + "|" + releaseDate,
                        name=titleNoFormatting + " [WowGirls.tv] ",
                        score=score,
                        lang=lang))

    return results
Пример #11
0
def update(metadata, siteID, movieGenres, movieActors):
    Log('******UPDATE CALLED*******')

    metadata_id = str(metadata.id).split('|')
    sceneID = int(metadata_id[0])
    sceneType = metadata_id[2]
    sceneIDName = 'clip_id' if sceneType == 'scenes' else 'movie_id'
    sceneDate = metadata_id[3]

    apiKEY = getAPIKey(PAsearchSites.getSearchBaseURL(siteID))

    url = PAsearchSites.getSearchSearchURL(
        siteID
    ) + '?x-algolia-application-id=TSMKFA364Q&x-algolia-api-key=' + apiKEY
    data = getAlgolia(url, 'all_' + sceneType,
                      'filters=%s=%d' % (sceneIDName, sceneID),
                      PAsearchSites.getSearchBaseURL(siteID))
    detailsPageElements = data['results'][0]['hits'][0]

    # Studio
    metadata.studio = detailsPageElements['studio_name']

    # Title
    metadata.title = detailsPageElements['title']

    # Summary
    metadata.summary = detailsPageElements['description'].replace(
        '</br>', '\n')

    # Release Date
    date_object = parse(sceneDate)
    metadata.originally_available_at = date_object
    metadata.year = metadata.originally_available_at.year

    # Tagline and Collection(s)
    metadata.collections.clear()
    for collectionName in ['studio_name', 'serie_name']:
        if collectionName in detailsPageElements:
            metadata.collections.add(detailsPageElements[collectionName])
    if ':' in detailsPageElements['title'] or '#' in detailsPageElements[
            'title']:
        if 'movie_title' in detailsPageElements:
            metadata.collections.add(detailsPageElements['movie_title'])

    # Genres
    movieGenres.clearGenres()
    for genreLink in detailsPageElements['categories']:
        genreName = genreLink['name']
        if genreName:
            movieGenres.addGenre(genreName)

    # Actors
    movieActors.clearActors()
    for actorLink in detailsPageElements['actors']:
        actorName = actorLink['name']

        data = getAlgolia(url, 'all_actors',
                          'filters=actor_id=' + actorLink['actor_id'],
                          PAsearchSites.getSearchBaseURL(siteID))
        actorData = data['results'][0]['hits'][0]
        if actorData['pictures']:
            max_quality = sorted(actorData['pictures'].keys())[-1]
            actorPhotoURL = 'https://images-fame.gammacdn.com/actors' + actorData[
                'pictures'][max_quality]
        else:
            actorPhotoURL = ''

        movieActors.addActor(actorName, actorPhotoURL)

    # Posters
    art = []

    if not PAsearchSites.getSearchBaseURL(siteID).endswith(
        ('girlsway.com', 'puretaboo.com')):
        art.append(
            'https://images-fame.gammacdn.com/movies/{0}/{0}_{1}_front_400x625.jpg'
            .format(detailsPageElements['movie_id'],
                    detailsPageElements['url_title'].lower().replace('-',
                                                                     '_')))

    if 'pictures' in detailsPageElements:
        keys = [
            key for key in detailsPageElements['pictures'].keys()
            if key[0].isdigit()
        ]
        max_quality = sorted(keys)[-1]
        art.append('https://images-fame.gammacdn.com/movies/' +
                   detailsPageElements['pictures'][max_quality])

    Log('Artwork found: %d' % len(art))
    for idx, posterUrl in enumerate(art, 1):
        if not PAsearchSites.posterAlreadyExists(posterUrl, metadata):
            # Download image file for analysis
            try:
                img_file = urllib.urlopen(posterUrl)
                im = StringIO(img_file.read())
                resized_image = Image.open(im)
                width, height = resized_image.size
                # Add the image proxy items to the collection
                if width > 1:
                    # Item is a poster
                    metadata.posters[posterUrl] = Proxy.Media(HTTP.Request(
                        posterUrl,
                        headers={
                            'Referer': 'http://www.google.com'
                        }).content,
                                                              sort_order=idx)
                if width > 100 and width > height:
                    # Item is an art item
                    metadata.art[posterUrl] = Proxy.Media(HTTP.Request(
                        posterUrl,
                        headers={
                            'Referer': 'http://www.google.com'
                        }).content,
                                                          sort_order=idx)
            except:
                pass

    return metadata
def update(metadata, lang, siteNum, movieGenres, movieActors):
    metadata_id = str(metadata.id).split('|')
    sceneID = metadata_id[0]

    url = PAsearchSites.getSearchSearchURL(siteNum) + '/scenes/' + sceneID
    req = getDataFromAPI(url)
    detailsPageElements = req['data']
    # Title
    metadata.title = detailsPageElements['title']

    # Summary
    metadata.summary = detailsPageElements['description']

    # Studio, Tagline and Collection(s)
    metadata.collections.clear()
    if 'site' in detailsPageElements and detailsPageElements['site']:
        studio_name = detailsPageElements['site']['name']
        collections = [studio_name]

        site_id = detailsPageElements['site']['id']
        network_id = detailsPageElements['site']['network_id']

        if network_id and site_id != network_id:
            url = PAsearchSites.getSearchSearchURL(
                siteNum) + '/sites/%d' % network_id
            req = getDataFromAPI(url)
            if req and 'data' in req and req['data']:
                studio_name = req['data']['name']
                collections.append(studio_name)

        metadata.tagline = studio_name
        metadata.studio = studio_name

        for collection in collections:
            metadata.collections.add(collection)

    # Release Date
    date = detailsPageElements['date']
    if date:
        date_object = parse(date)
        metadata.originally_available_at = date_object
        metadata.year = metadata.originally_available_at.year

    # Genres
    movieGenres.clearGenres()
    if 'tags' in detailsPageElements:
        for genreLink in detailsPageElements['tags']:
            genreName = genreLink['name']

            movieGenres.addGenre(genreName)

    # Actors
    movieActors.clearActors()
    for actorLink in detailsPageElements['performers']:
        actorName = actorLink['name']
        actorPhotoURL = actorLink['image']

        if 'parent' in actorLink and actorLink[
                'parent'] and 'name' in actorLink['parent']:
            actorName = actorLink['parent']['name']

        movieActors.addActor(actorName, actorPhotoURL)

    # Posters
    art = [
        detailsPageElements['posters']['large'],
        detailsPageElements['background']['large'],
    ]

    Log('Artwork found: %d' % len(art))
    for idx, posterUrl in enumerate(art, 1):
        if not PAsearchSites.posterAlreadyExists(posterUrl, metadata):
            # Download image file for analysis
            try:
                image = PAutils.HTTPRequest(posterUrl)
                im = StringIO(image.content)
                resized_image = Image.open(im)
                width, height = resized_image.size
                # Add the image proxy items to the collection
                if width > 1:
                    # Item is a poster
                    metadata.posters[posterUrl] = Proxy.Media(image.content,
                                                              sort_order=idx)
                if width > 100 and width > height:
                    # Item is an art item
                    metadata.art[posterUrl] = Proxy.Media(image.content,
                                                          sort_order=idx)
            except:
                pass

    return metadata
Пример #13
0
def search(results, encodedTitle, title, searchTitle, siteNum, lang,
           searchByDateActor, searchDate, searchSiteID):

    networkscene = True
    networkscenepages = True
    networkdvd = True
    directmatch = False
    network_sep_scene_prev = ""
    network_sep_scene = ""
    network_sep_scene_pages_prev = ""
    network_sep_scene_pages = "/"
    network_sep_scene_pages_next = ""
    network_sep_dvd_prev = ""
    network_sep_dvd = "/1/dvd"

    if searchSiteID != 9999:
        siteNum = searchSiteID
    if siteNum == 278 or (siteNum >= 285 and siteNum <= 287):
        network = 'XEmpire'
        network_sep_scene_prev = "scene/"
        network_sep_scene_pages_prev = "scene/"
        network_sep_dvd_prev = "dvd/"
        network_sep_dvd = "/1"
    elif siteNum == 329 or (siteNum >= 351 and siteNum <= 354):
        network = 'Blowpass'
        networkdvd = False
    elif siteNum == 331 or (siteNum >= 355
                            and siteNum <= 360) or siteNum == 750:
        network = 'Fantasy Massage'
        networkdvd = False
        network_sep_scene = "/scene"
        network_sep_scene_pages = "/scene/"
    elif (siteNum >= 365
          and siteNum <= 372) or siteNum == 466 or siteNum == 692:
        network = '21Sextury'
        networkdvd = False
    elif siteNum == 183 or (siteNum >= 373 and siteNum <= 374):
        network = '21Naturals'
        networkdvd = False
        network_sep_scene = "/scene"
        network_sep_scene_pages = "/scene/"
    elif siteNum == 53 or (siteNum >= 375 and siteNum <= 379):
        network = 'Girlsway'
        networkdvd = False
    elif siteNum >= 383 and siteNum <= 386:
        network = 'Fame Digital'
        if siteNum == 383:
            networkdvd = False
            network_sep_scene = "/scene"
            network_sep_scene_pages = "/scene/"
            network_sep_dvd = "/dvd"
        if siteNum == 386:
            networkscene = False
            networkscenepages = False
            networkdvd = False
    elif siteNum >= 387 and siteNum <= 392:
        network = 'Open Life Network'
        networkdvd = False
    elif siteNum == 281:
        network = 'Pure Taboo'
        networkdvd = False
        network_sep_scene = "/scene"
        network_sep_scene_pages = "/scene/"
    elif siteNum == 380:
        network = 'Girlfriends Films'
        network_sep_scene = "?query=&pscenes=0&tab=scenes"
        network_sep_scene_pages = "?query=&pscenes="
        network_sep_scene_pages_next = "&tab=scenes"
        network_sep_dvd = "&tab=movies"
    elif siteNum == 381:
        network = 'Burning Angel'
        networkdvd = False
        network_sep_scene = "/scene"
        network_sep_scene_pages = "/scene/"
    elif siteNum == 277:
        network = 'Evil Angel'
        networkscene = False
        networkscenepages = False
        networkdvd = False
        directmatch = True
        network_sep_scene = "/scene"
        network_sep_scene_pages = "/scene/"
        network_sep_dvd = "/dvd"
    elif siteNum == 382:
        network = 'Pretty Dirty'
        networkdvd = False
        network_sep_scene = "/scene"
        network_sep_scene_pages = "/scene/"
    elif siteNum >= 460 and siteNum <= 465:
        network = '21Sextreme'
        networkdvd = False
        network_sep_scene = "/scene"
        network_sep_scene_pages = "/scene/"

    if network == PAsearchSites.getSearchSiteName(siteNum):
        network = ''
    else:
        network = network + "/"

    if networkscene:
        # Result to check
        resultfirst = []
        # Result next page
        resultsecond = []

        #searchResults = HTML.ElementFromURL(PAsearchSites.getSearchSearchURL(siteNum) + encodedTitle + "?query=" + encodedTitle)
        encodedTitle = encodedTitle.replace("%27", "").replace(
            "%3F", "").replace("%2C",
                               "")  #Remove troublesome punctuation (, . ?)
        searchResults = HTML.ElementFromURL(
            PAsearchSites.getSearchSearchURL(siteNum) +
            network_sep_scene_prev + encodedTitle + network_sep_scene)
        for searchResult in searchResults.xpath('//div[@class="tlcDetails"]'):
            titleNoFormatting = searchResult.xpath(
                './/a[1]')[0].text_content().strip()
            titleNoFormatting = titleNoFormatting.replace("BONUS-", "BONUS - ")
            titleNoFormatting = titleNoFormatting.replace("BTS-", "BTS - ")

            curID = searchResult.xpath('.//a[1]')[0].get('href').replace(
                '/', '_').replace('?', '!')
            resultfirst.append(curID)

            try:
                actorLink = searchResult.xpath('.//div[@class="tlcActors"]/a')
                actor = ' - '
                if "BONUS" in titleNoFormatting or "BTS" in titleNoFormatting:
                    for actorText in actorLink:
                        actorName = str(actorText.text_content().strip())
                        if "Rocco Siffredi" not in actorName and "Peter North" not in actorName:
                            actor = actor + actorName + ", "
                else:
                    actor = actor + str(actorLink[0].text_content().strip())
                actor = actor.strip()
                actor = actor.strip(",")
                actor = " " + actor
            except:
                actor = ''

            try:
                releaseDate = parse(
                    searchResult.xpath(
                        './/div[@class="tlcSpecs"]/span[@class="tlcSpecsDate"]/span[@class="tlcDetailsValue"]'
                    )[0].text_content().strip()).strftime('%Y-%m-%d')
            except:
                try:
                    detailsPageElements = HTML.ElementFromURL(
                        PAsearchSites.getSearchBaseURL(siteNum) +
                        searchResult.xpath('.//a[1]')[0].get('href'))
                    releaseDate = parse(
                        detailsPageElements.xpath('//*[@class="updatedDate"]')
                        [0].text_content().strip()).strftime('%Y-%m-%d')
                except:
                    releaseDate = ''
            if searchDate and releaseDate:
                score = 100 - Util.LevenshteinDistance(searchDate, releaseDate)
            else:
                score = 100 - Util.LevenshteinDistance(
                    searchTitle.lower(), titleNoFormatting.lower())

            results.Append(
                MetadataSearchResult(
                    id=curID + "|" + str(siteNum),
                    name=titleNoFormatting + actor + " [" + network +
                    PAsearchSites.getSearchSiteName(siteNum) + "] " +
                    releaseDate,
                    score=score,
                    lang=lang))

        if networkscenepages:
            # Other pages
            i = 2
            while i < 3:
                pagenum = i
                if siteNum == 380:
                    pagenum = i - 1
                searchResultsSec = HTML.ElementFromURL(
                    PAsearchSites.getSearchSearchURL(siteNum) +
                    network_sep_scene_pages_prev + encodedTitle +
                    network_sep_scene_pages + str(pagenum) +
                    network_sep_scene_pages_next)
                i += 1
                searchResultSec = searchResultsSec.xpath(
                    '//div[@class="tlcDetails"]')
                if searchResultSec:
                    titleText = searchResultSec[0].xpath('.//a[1]')[0]
                    resultSEARCH = titleText.get('href').replace('/',
                                                                 '_').replace(
                                                                     '?', '!')

                    for resultCheck in resultfirst:
                        if resultCheck == resultSEARCH:
                            i = 100
                            break

                    for searchResultSec in searchResultsSec.xpath(
                            '//div[@class="tlcDetails"]'):
                        titleText = searchResultSec.xpath('.//a[1]')[0]
                        titleNoFormatting = titleText.text_content().strip()
                        titleNoFormatting = titleNoFormatting.replace(
                            "BONUS-", "BONUS - ")
                        titleNoFormatting = titleNoFormatting.replace(
                            "BTS-", "BTS - ")

                        curID = titleText.get('href').replace('/',
                                                              '_').replace(
                                                                  '?', '!')
                        resultsecond.append(curID)

                        try:
                            actorLink = searchResultSec.xpath(
                                './/div[@class="tlcActors"]/a')
                            actor = ' - '
                            if "BONUS" in titleNoFormatting or "BTS" in titleNoFormatting:
                                for actorText in actorLink:
                                    actorName = str(
                                        actorText.text_content().strip())
                                    if "Rocco Siffredi" not in actorName and "Peter North" not in actorName:
                                        actor = actor + actorName + ", "
                            else:
                                actor = actor + str(
                                    actorLink[0].text_content().strip())
                            actor = actor.strip()
                            actor = actor.strip(",")
                            actor = " " + actor
                        except:
                            actor = ''

                        try:
                            releaseDate = parse(
                                searchResultSec.xpath(
                                    './/div[@class="tlcSpecs"]/span[@class="tlcSpecsDate"]/span[@class="tlcDetailsValue"]'
                                )[0].text_content().strip()).strftime(
                                    '%Y-%m-%d')
                        except:
                            try:
                                detailsPageElements = HTML.ElementFromURL(
                                    PAsearchSites.getSearchBaseURL(siteNum) +
                                    searchResultSec.xpath('.//a[1]')[0].get(
                                        'href'))
                                releaseDate = parse(
                                    detailsPageElements.xpath(
                                        '//*[@class="updatedDate"]')
                                    [0].text_content().strip()).strftime(
                                        '%Y-%m-%d')
                            except:
                                releaseDate = ''

                        if searchDate and releaseDate:
                            score = 100 - Util.LevenshteinDistance(
                                searchDate, releaseDate)
                        else:
                            score = 100 - Util.LevenshteinDistance(
                                searchTitle.lower(), titleNoFormatting.lower())

                        results.Append(
                            MetadataSearchResult(
                                id=curID + "|" + str(siteNum),
                                name=titleNoFormatting + actor + " [" +
                                network +
                                PAsearchSites.getSearchSiteName(siteNum) +
                                "] " + releaseDate,
                                score=score,
                                lang=lang))

                    resultfirst = resultsecond
                    resultsecond = []
                else:
                    i = 100

    if directmatch:
        # Result to check
        resultfirst = []
        searchString = encodedTitle.replace("%20", '-').lower()
        #searchResults = HTML.ElementFromURL(PAsearchSites.getSearchSearchURL(siteNum) + encodedTitle + "?query=" + encodedTitle)
        searchResults = HTML.ElementFromURL(
            PAsearchSites.getSearchSearchURL(siteNum) + searchString)
        for searchResult in searchResults.xpath('//div[@id="infoWrapper"]'):
            titleNoFormatting = searchResult.xpath(
                './/h1[1]')[0].text_content().strip()
            titleNoFormatting = titleNoFormatting.replace("BONUS-", "BONUS - ")
            titleNoFormatting = titleNoFormatting.replace("BTS-", "BTS - ")

            curID = (PAsearchSites.getSearchSearchURL(siteNum) +
                     searchString).replace('/', '_').replace('?', '!')
            resultfirst.append(curID)
            Log(curID + titleNoFormatting + "FOUND")

            # try:
            #     actorLink = searchResult.xpath('.//div[@class="tlcActors"]/a')
            #     actor = ' - '
            #     if "BONUS" in titleNoFormatting or "BTS" in titleNoFormatting:
            #         for actorText in actorLink:
            #             actorName = str(actorText.text_content().strip())
            #             if "Rocco Siffredi" not in actorName and "Peter North" not in actorName:
            #                 actor = actor + actorName + ", "
            #     else:
            #         actor = actor + str(actorLink[0].text_content().strip())
            #     actor = actor.strip()
            #     actor = actor.strip(",")
            #     actor = " " + actor
            # except:
            #     actor = ''

            try:
                releaseDate = (searchResult.xpath('//li[@class="updatedDate"]')
                               [0].text_content().strip()).strftime('%Y-%m-%d')


#            except:
#                try:
#                    detailsPageElements = HTML.ElementFromURL(PAsearchSites.getSearchBaseURL(siteNum) + searchResult.xpath('.//a[1]')[0].get('href'))
#                    releaseDate = parse(detailsPageElements.xpath('//*[@class="updatedDate"]')[0].text_content().strip()).strftime('%Y-%m-%d')
            except:
                releaseDate = ''
            Log(releaseDate + "FOUND")
            if searchDate and releaseDate:
                score = 100 - Util.LevenshteinDistance(searchDate, releaseDate)
            else:
                score = 100 - Util.LevenshteinDistance(
                    searchTitle.lower(), titleNoFormatting.lower())

            results.Append(
                MetadataSearchResult(id=curID + "|" + str(siteNum),
                                     name=titleNoFormatting + " [" + network +
                                     PAsearchSites.getSearchSiteName(siteNum) +
                                     "] " + releaseDate,
                                     score=score,
                                     lang=lang))

    if networkdvd:
        try:
            dvdResults = HTML.ElementFromURL(
                PAsearchSites.getSearchSearchURL(siteNum) +
                network_sep_dvd_prev + encodedTitle + network_sep_dvd)
            for dvdResult in dvdResults.xpath(
                    '//div[contains(@class,"tlcItem playlistable_dvds")] | //div[@class="tlcDetails"]'
            ):
                titleNoFormatting = dvdResult.xpath(
                    './/div[@class="tlcTitle"]/a')[0].get('title').strip()
                curID = dvdResult.xpath('.//a')[0].get('href').replace(
                    '/', '_').replace('?', '!')
                try:
                    releaseDate = parse(
                        dvdResult.xpath(
                            './/div[@class="tlcSpecs"]/span[@class="tlcSpecsDate"]/span[@class="tlcDetailsValue"]'
                        )[0].text_content().strip())
                except:
                    try:
                        detailsPageElements = HTML.ElementFromURL(
                            PAsearchSites.getSearchBaseURL(siteNum) +
                            dvdResult.xpath('.//a[1]')[0].get('href'))
                        releaseDate = parse(
                            detailsPageElements.xpath(
                                '//*[@class="updatedDate"]')
                            [0].text_content().strip())
                    except:
                        releaseDate = ''

                score = 100 - Util.LevenshteinDistance(
                    searchTitle.lower(), titleNoFormatting.lower())

                results.Append(
                    MetadataSearchResult(
                        id=curID + "|" + str(siteNum),
                        name=titleNoFormatting + " (" +
                        releaseDate.strftime('%Y') + ") - Full Movie [" +
                        PAsearchSites.getSearchSiteName(siteNum) + "]",
                        score=score,
                        lang=lang))
        except:
            pass

    return results
def search(results, lang, siteNum, search):
    searchResults = []
    siteResults = []
    temp = []
    count = 0

    sceneID = None
    splited = search['title'].split()
    if unicode(splited[0], 'UTF-8').isdigit():
        sceneID = splited[0]

        if int(sceneID) > 100:
            search['title'] = search['title'].replace(sceneID, '', 1).strip()
            sceneURL = '%s/content/%s' % (
                PAsearchSites.getSearchBaseURL(siteNum), sceneID)
            searchResults.append(sceneURL)

    search['encoded'] = search['title'].replace(' ', '+')
    searchURL = '%s%s' % (PAsearchSites.getSearchSearchURL(siteNum),
                          search['encoded'])
    req = PAutils.HTTPRequest(searchURL,
                              headers={'Referer': 'http://www.data18.com'})
    searchPageElements = HTML.ElementFromString(req.text)

    for searchResult in searchPageElements.xpath(
            '//p[@class="genmed"]//parent::div'):
        sceneURL = searchResult.xpath(
            './/*[contains(@href, "content")]/@href')[0]

        if sceneURL not in searchResults:
            urlID = re.sub(r'.*/', '', sceneURL)

            try:
                siteName = searchResult.xpath(
                    './/*[contains(., "Network")]')[0].text_content().replace(
                        'Network:', '').strip()
            except:
                try:
                    siteName = searchResult.xpath('.//*[contains(., "Studio")]'
                                                  )[0].text_content().replace(
                                                      'Studio:', '').strip()
                except:
                    siteName = ''

            try:
                subSite = searchResult.xpath(
                    './/p[@class][contains(., "Site:")]')[0].text_content(
                    ).replace('Site:', '').strip()
            except:
                subSite = ''

            if siteName:
                siteDisplay = '%s/%s' % (siteName,
                                         subSite) if subSite else siteName
            else:
                siteDisplay = subSite

            titleNoFormatting = PAutils.parseTitle(
                searchResult.xpath('.//*[contains(@href, "content")]')
                [1].text_content(), siteNum)
            curID = PAutils.Encode(sceneURL)
            siteResults.append(sceneURL)

            try:
                date = searchResult.xpath(
                    './/p[@class="genmed"]')[0].text_content().strip()
                date = re.sub(r'^#(.*?)\s', '', date)
            except:
                date = ''

            if date and not date == 'unknown':
                date = date.replace('Sept', 'Sep')
                releaseDate = parse(date).strftime('%Y-%m-%d')
            else:
                releaseDate = parse(search['date']).strftime(
                    '%Y-%m-%d') if search['date'] else ''
            displayDate = releaseDate if date else ''

            if sceneID == urlID:
                score = 100
            elif search['date'] and displayDate:
                score = 80 - Util.LevenshteinDistance(search['date'],
                                                      releaseDate)
            else:
                score = 80 - Util.LevenshteinDistance(
                    search['title'].lower(), titleNoFormatting.lower())

            if score == 80:
                count += 1
                temp.append(
                    MetadataSearchResult(
                        id='%s|%d|%s' % (curID, siteNum, releaseDate),
                        name='%s [%s] %s' %
                        (titleNoFormatting, siteDisplay, displayDate),
                        score=score,
                        lang=lang))
            else:
                results.Append(
                    MetadataSearchResult(
                        id='%s|%d|%s' % (curID, siteNum, releaseDate),
                        name='%s [%s] %s' %
                        (titleNoFormatting, siteDisplay, displayDate),
                        score=score,
                        lang=lang))

    googleResults = PAutils.getFromGoogleSearch(search['title'], siteNum)
    for sceneURL in googleResults:
        if ('/content/' in sceneURL and '.html' not in sceneURL
                and sceneURL not in searchResults
                and sceneURL not in siteResults):
            searchResults.append(sceneURL)

    for sceneURL in searchResults:
        req = PAutils.HTTPRequest(sceneURL)
        detailsPageElements = HTML.ElementFromString(req.text)
        urlID = re.sub(r'.*/', '', sceneURL)

        try:
            siteName = detailsPageElements.xpath(
                '//i[contains(., "Network")]//preceding-sibling::a[1]'
            )[0].text_content().strip()
        except:
            try:
                siteName = detailsPageElements.xpath(
                    '//i[contains(., "Studio")]//preceding-sibling::a[1]'
                )[0].text_content().strip()
            except:
                siteName = ''

        try:
            subSite = detailsPageElements.xpath(
                '//i[contains(., "Site")]//preceding-sibling::a[1]'
            )[0].text_content().strip()
        except:
            subSite = ''

        if siteName:
            siteDisplay = '%s/%s' % (siteName,
                                     subSite) if subSite else siteName
        else:
            siteDisplay = subSite

        titleNoFormatting = PAutils.parseTitle(
            detailsPageElements.xpath('//h1')[0].text_content(), siteNum)
        curID = PAutils.Encode(sceneURL)

        try:
            date = detailsPageElements.xpath(
                '//span[@class][./*[contains(.., "date")]]')[0].text_content(
                ).split(':', 2)[-1].strip()
        except:
            date = ''

        if date and not date == 'unknown':
            releaseDate = parse(date).strftime('%Y-%m-%d')
        else:
            releaseDate = parse(
                search['date']).strftime('%Y-%m-%d') if search['date'] else ''
        displayDate = releaseDate if date else ''

        if sceneID == urlID:
            score = 100
        elif search['date'] and displayDate:
            score = 80 - Util.LevenshteinDistance(search['date'], releaseDate)
        else:
            score = 80 - Util.LevenshteinDistance(search['title'].lower(),
                                                  titleNoFormatting.lower())

        if score == 80:
            count += 1
            temp.append(
                MetadataSearchResult(
                    id='%s|%d|%s' % (curID, siteNum, releaseDate),
                    name='%s [%s] %s' %
                    (titleNoFormatting, siteDisplay, displayDate),
                    score=score,
                    lang=lang))
        else:
            results.Append(
                MetadataSearchResult(
                    id='%s|%d|%s' % (curID, siteNum, releaseDate),
                    name='%s [%s] %s' %
                    (titleNoFormatting, siteDisplay, displayDate),
                    score=score,
                    lang=lang))

    for result in temp:
        if count > 1 and result.score == 80:
            results.Append(
                MetadataSearchResult(id=result.id,
                                     name=result.name,
                                     score=79,
                                     lang=lang))
        else:
            results.Append(
                MetadataSearchResult(id=result.id,
                                     name=result.name,
                                     score=result.score,
                                     lang=lang))

    return results
Пример #15
0
def update(metadata, siteID, movieGenres, movieActors):
    Log('******UPDATE CALLED*******')

    metadata_id = str(metadata.id).split('|')
    sceneType = metadata_id[2] if len(metadata_id) > 2 else None

    if sceneType:
        sceneID = int(metadata_id[0])
        sceneIDName = 'clip_id' if sceneType == 'scenes' else 'movie_id'
        sceneDate = metadata_id[3]
        apiKEY = getAPIKey(PAsearchSites.getSearchBaseURL(siteID))
        urlParams = '?x-algolia-application-id=TSMKFA364Q&x-algolia-api-key=' + apiKEY

        url = PAsearchSites.getSearchSearchURL(siteID).replace(
            '*', 'girlfriendsfilms_' + sceneType, 1) + urlParams
        data = getAlgolia(url, 'filters=%s=%d' % (sceneIDName, sceneID),
                          PAsearchSites.getSearchBaseURL(siteID))
        detailsPageElements = data['hits'][0]

        url = PAsearchSites.getSearchSearchURL(siteID).replace(
            '*', 'girlfriendsfilms_scenes', 1) + urlParams
        data = getAlgolia(url, 'query=%s' % detailsPageElements['url_title'],
                          PAsearchSites.getSearchBaseURL(siteID))['hits']
        data = sorted(data, key=lambda i: i['clip_id'])
        scenesPagesElements = enumerate(data, 1)

        # Studio
        metadata.studio = detailsPageElements['studio_name']

        # Title
        if sceneType == 'scenes':
            for idx, scene in scenesPagesElements:
                if scene['clip_id'] == sceneID:
                    metadata.title = '%s, Scene %d' % (
                        detailsPageElements['title'], idx)
        if not metadata.title:
            metadata.title = detailsPageElements['title']

        # Summary
        description = detailsPageElements['description']
        if not description.startswith('Previously released on'):
            metadata.summary = description

        # Release Date
        date_object = parse(sceneDate)
        metadata.originally_available_at = date_object
        metadata.year = metadata.originally_available_at.year

        # Tagline and Collection(s)
        metadata.collections.clear()
        for collectionName in ['network_name', 'serie_name', 'movie_title']:
            if collectionName in detailsPageElements:
                metadata.collections.add(detailsPageElements[collectionName])

        # Genres
        movieGenres.clearGenres()
        genres = detailsPageElements['categories']
        for genreLink in genres:
            genreName = genreLink['name']
            movieGenres.addGenre(genreName)

        if sceneType == 'movies':
            for idx, scene in scenesPagesElements:
                for genreLink in scene['categories']:
                    movieGenres.addGenre(genreLink['name'])

        # Actors
        movieActors.clearActors()
        actors = detailsPageElements['actors']
        for actorLink in actors:
            actorName = actorLink['name']

            url = PAsearchSites.getSearchSearchURL(siteID).replace(
                '*', 'girlfriendsfilms_actors', 1) + urlParams
            data = getAlgolia(url, 'filters=actor_id=' + actorLink['actor_id'],
                              PAsearchSites.getSearchBaseURL(siteID))
            actorData = data['hits'][0]
            if actorData['pictures']:
                max_quality = sorted(actorData['pictures'].keys())[-1]
                actorPhotoURL = 'https://images-fame.gammacdn.com/actors' + actorData[
                    'pictures'][max_quality]
            else:
                actorPhotoURL = ''

            movieActors.addActor(actorName, actorPhotoURL)

        # Posters
        art = [
            'https://images-fame.gammacdn.com/movies/{0}/{0}_{1}_front_400x625.jpg'
            .format(detailsPageElements['movie_id'],
                    detailsPageElements['url_title'].lower().replace('-', '_'))
        ]

        if 'pictures' in detailsPageElements:
            max_quality = sorted(detailsPageElements['pictures'].keys())[-3]
            art.append('https://images-fame.gammacdn.com/movies/' +
                       detailsPageElements['pictures'][max_quality])
        else:
            for idx, scene in scenesPagesElements:
                max_quality = sorted(scene['pictures'].keys())[-3]
                art.append('https://images-fame.gammacdn.com/movies/' +
                           scene['pictures'][max_quality])
    else:
        sceneURL = metadata_id[0].replace('_', '/').replace('!', '?')
        data = urllib.urlopen(sceneURL).read()
        detailsPageElements = HTML.ElementFromString(data)

        # Studio
        metadata.studio = detailsPageElements.xpath(
            '//div[@class="studio"]//a/text()')[0]

        # Title
        metadata.title = detailsPageElements.xpath(
            '//h1[@class="description"]/text()')[0]

        # Summary
        metadata.summary = detailsPageElements.xpath(
            '//div[@class="synopsis"]')[0].text_content().strip()

        # Release Date
        date = detailsPageElements.xpath(
            '//div[@class="release-date"]/text()')[0].strip()
        date_object = parse(date)
        metadata.originally_available_at = date_object
        metadata.year = metadata.originally_available_at.year

        # Tagline and Collection(s)
        metadata.collections.clear()
        metadata.collections.add(metadata.studio)

        # Genres
        movieGenres.clearGenres()
        genres = detailsPageElements.xpath('//div[@class="categories"]//a')
        for genreLink in genres:
            genreName = genreLink.xpath('./text()')[0]
            movieGenres.addGenre(genreName)

        # Actors
        movieActors.clearActors()
        actors = detailsPageElements.xpath(
            '//div[@class="video-performer"]//img')
        for actorLink in actors:
            actorName = actorLink.xpath('./@title')[0]
            actorPhotoURL = actorLink.xpath('./@data-bgsrc')[0]
            if 'image-not-available-performer-female' in actorPhotoURL:
                actorPhotoURL = ''

            movieActors.addActor(actorName, actorPhotoURL)

        # Posters
        art = [detailsPageElements.xpath('//picture//img/@src')[-1]]

        images = re.findall(r'img = \"(.*?)\";', data)
        for image in images:
            if image not in art:
                art.append(image)

    Log('Artwork found: %d' % len(art))
    for idx, posterUrl in enumerate(art, 1):
        if not PAsearchSites.posterAlreadyExists(posterUrl, metadata):
            # Download image file for analysis
            try:
                img_file = urllib.urlopen(posterUrl)
                im = StringIO(img_file.read())
                resized_image = Image.open(im)
                width, height = resized_image.size
                # Add the image proxy items to the collection
                if width > 1:
                    # Item is a poster
                    metadata.posters[posterUrl] = Proxy.Media(HTTP.Request(
                        posterUrl,
                        headers={
                            'Referer': 'http://www.google.com'
                        }).content,
                                                              sort_order=idx)
                if width > 100 and width > height:
                    # Item is an art item
                    metadata.art[posterUrl] = Proxy.Media(HTTP.Request(
                        posterUrl,
                        headers={
                            'Referer': 'http://www.google.com'
                        }).content,
                                                          sort_order=idx)
            except:
                pass

    return metadata
def update(metadata, siteID, movieGenres, movieActors):
    Log('******UPDATE CALLED*******')

    metadata_id = str(metadata.id).split('|')
    sceneID = metadata_id[0]
    sceneType = metadata_id[2]

    cookies = get_Cookies(PAsearchSites.getSearchBaseURL(siteID))
    headers = {
        'Instance': cookies['instance_token'],
    }
    url = PAsearchSites.getSearchSearchURL(
        siteID) + '/v2/releases?type=%s&id=%s' % (sceneType, sceneID)
    req = urllib.Request(url, headers=headers)
    data = urllib.urlopen(req).read()
    detailsPageElements = json.loads(data)['result'][0]

    # Studio
    metadata.studio = detailsPageElements['brand'].title()

    # Title
    metadata.title = detailsPageElements['title']

    # Summary
    description = None
    if 'description' in detailsPageElements:
        description = detailsPageElements['description']
    elif 'parent' in detailsPageElements:
        if 'description' in detailsPageElements['parent']:
            description = detailsPageElements['parent']['description']

    if description:
        metadata.summary = description

    # Release Date
    date_object = parse(detailsPageElements['dateReleased'])
    metadata.originally_available_at = date_object
    metadata.year = metadata.originally_available_at.year

    # Tagline and Collection(s)
    metadata.collections.clear()
    seriesNames = []

    if 'collections' in detailsPageElements and detailsPageElements[
            'collections']:
        for collection in detailsPageElements['collections']:
            seriesNames.append(collection['name'])
    if 'parent' in detailsPageElements:
        if 'title' in detailsPageElements['parent']:
            seriesNames.append(detailsPageElements['parent']['title'])

    isInCollection = False
    siteName = PAsearchSites.getSearchSiteName(siteID).lower().replace(
        ' ', '').replace('\'', '')
    for seriesName in seriesNames:
        if seriesName.lower().replace(' ', '').replace('\'', '') == siteName:
            isInCollection = True
            break

    if not isInCollection:
        seriesNames.insert(0, PAsearchSites.getSearchSiteName(siteID))

    for seriesName in seriesNames:
        metadata.collections.add(seriesName)

    # Genres
    movieGenres.clearGenres()
    genres = detailsPageElements['tags']
    for genreLink in genres:
        genreName = genreLink['name']
        movieGenres.addGenre(genreName)

    # Actors
    movieActors.clearActors()
    actors = detailsPageElements['actors']
    for actorLink in actors:
        actorPageURL = PAsearchSites.getSearchSearchURL(
            siteID) + '/v1/actors?id=%d' % actorLink['id']

        req = urllib.Request(actorPageURL, headers=headers)
        data = urllib.urlopen(req).read()
        actorData = json.loads(data)['result'][0]

        actorName = actorData['name']
        actorPhotoURL = ''
        if actorData['images'] and actorData['images']['profile']:
            actorPhotoURL = actorData['images']['profile'][0]['xs']['url']

        movieActors.addActor(actorName, actorPhotoURL)

    # Posters
    art = []
    for imageType in ['poster', 'cover']:
        if imageType in detailsPageElements['images']:
            for image in detailsPageElements['images'][imageType]:
                art.append(image['xx']['url'])

    Log('Artwork found: %d' % len(art))
    for idx, posterUrl in enumerate(art, 1):
        if not PAsearchSites.posterAlreadyExists(posterUrl, metadata):
            # Download image file for analysis
            try:
                img_file = urllib.urlopen(posterUrl)
                im = StringIO(img_file.read())
                resized_image = Image.open(im)
                width, height = resized_image.size
                # Add the image proxy items to the collection
                if width > 1:
                    # Item is a poster
                    metadata.posters[posterUrl] = Proxy.Media(HTTP.Request(
                        posterUrl,
                        headers={
                            'Referer': 'http://www.google.com'
                        }).content,
                                                              sort_order=idx)
                if width > 100 and width > height:
                    # Item is an art item
                    metadata.art[posterUrl] = Proxy.Media(HTTP.Request(
                        posterUrl,
                        headers={
                            'Referer': 'http://www.google.com'
                        }).content,
                                                          sort_order=idx)
            except:
                pass

    return metadata
Пример #17
0
def search(results, encodedTitle, title, searchTitle, siteNum, lang,
           searchByDateActor, searchDate, searchSiteID):
    if searchSiteID != 9999:
        siteNum = searchSiteID

    sceneID = searchTitle.split(' ', 1)[0]
    if unicode(sceneID, 'utf8').isdigit():
        searchTitle = searchTitle.replace(sceneID, '', 1).strip()
    else:
        sceneID = None

    apiKEY = getAPIKey(PAsearchSites.getSearchBaseURL(siteNum))
    for sceneType in ['scenes', 'movies']:
        url = PAsearchSites.getSearchSearchURL(siteNum).replace(
            '*', 'girlfriendsfilms_' + sceneType, 1
        ) + '?x-algolia-application-id=TSMKFA364Q&x-algolia-api-key=' + apiKEY
        data = getAlgolia(url, 'query=' + searchTitle,
                          PAsearchSites.getSearchBaseURL(siteNum))

        searchResults = data['hits']
        for searchResult in searchResults:
            if sceneType == 'scenes':
                releaseDate = parse(searchResult['release_date'])

                actors = []
                for actorLink in searchResult['female_actors']:
                    actors.append(actorLink['name'])
                sceneData = ', '.join(actors)
                curID = searchResult['clip_id']
                titleNoFormatting = '%s %s' % (searchResult['title'],
                                               sceneData)
            else:
                date = 'last_modified' if searchResult[
                    'last_modified'] else 'date_created'
                releaseDate = parse(searchResult[date])
                curID = searchResult['movie_id']
                titleNoFormatting = searchResult['title']

            description = searchResult['description']
            if description.startswith('Previously released on'):
                date = description.split('.', 1)[0].replace(
                    'Previously released on', '', 1).strip()
                releaseDate = parse(date)

            if searchDate:
                date = parse(searchDate)
                if date.year < releaseDate.year:
                    releaseDate = date

            releaseDate = releaseDate.strftime('%Y-%m-%d')

            if sceneID:
                score = 100 - Util.LevenshteinDistance(sceneID, curID)
            else:
                score = 100 - Util.LevenshteinDistance(
                    searchTitle.lower(), titleNoFormatting.lower())

            results.Append(
                MetadataSearchResult(
                    id='%d|%d|%s|%s' %
                    (curID, siteNum, sceneType, releaseDate),
                    name='[%s] %s %s' %
                    (sceneType.capitalize(), titleNoFormatting, releaseDate),
                    score=score,
                    lang=lang))

    searchResults = HTML.ElementFromURL(
        'https://www.girlfriendsfilms.net/Search?media=2&q=' + encodedTitle)
    for searchResult in searchResults.xpath('//div[@class="grid-item"]'):
        titleNoFormatting = searchResult.xpath(
            './/span[@class="overlay-inner"]//text()')[0]
        sceneURL = 'https://www.girlfriendsfilms.net' + searchResult.xpath(
            './/a/@href')[0]
        curID = sceneURL.replace('/', '_').replace('?', '!')
        score = 100 - Util.LevenshteinDistance(searchTitle.lower(),
                                               titleNoFormatting.lower())

        results.Append(
            MetadataSearchResult(id='%s|%d' % (curID, siteNum),
                                 name='[DVD] %s' % (titleNoFormatting),
                                 score=score,
                                 lang=lang))

    return results
def search(results, encodedTitle, title, searchTitle, siteNum, lang,
           searchByDateActor, searchDate, searchSiteID):
    if searchSiteID != 9999:
        siteNum = searchSiteID
    if siteNum == 278 or (siteNum >= 285 and siteNum <= 287):
        network = 'XEmpire'
    elif siteNum == 329 or (siteNum >= 351 and siteNum <= 354):
        network = 'Blowpass'
    elif siteNum == 331 or (siteNum >= 355 and siteNum <= 360):
        network = 'Fantasy Massage'
    elif siteNum == 330 or siteNum == 332 or (siteNum >= 361
                                              and siteNum <= 364):
        network = 'Mile High Network'
    elif (siteNum >= 365 and siteNum <= 372) or siteNum == 466:
        network = '21Sextury'
    elif siteNum == 183 or (siteNum >= 373 and siteNum <= 374):
        network = '21Naturals'
    elif siteNum == 53 or (siteNum >= 375 and siteNum <= 379):
        network = 'Girlsway'
    elif siteNum >= 383 and siteNum <= 386:
        network = 'Fame Digital'
    elif siteNum >= 387 and siteNum <= 392:
        network = 'Open Life Network'
    elif siteNum == 281:
        network = 'Pure Taboo'
    elif siteNum == 380:
        network = 'Girlfriends Films'
    elif siteNum == 381:
        network = 'Burning Angel'
    elif siteNum == 277:
        network = 'Evil Angel'
    elif siteNum == 382:
        network = 'Pretty Dirty'
    elif siteNum >= 460 and siteNum <= 466:
        network = '21Sextreme'

    if network == PAsearchSites.getSearchSiteName(siteNum):
        network = ''
    else:
        network = network + "/"

    searchResults = HTML.ElementFromURL(
        PAsearchSites.getSearchSearchURL(siteNum) + encodedTitle + "?query=" +
        encodedTitle)
    for searchResult in searchResults.xpath('//div[@class="tlcDetails"]'):
        titleNoFormatting = searchResult.xpath(
            './/a[1]')[0].text_content().strip()
        curID = searchResult.xpath('.//a[1]')[0].get('href').replace(
            '/', '_').replace('?', '!')
        try:
            releaseDate = parse(
                searchResult.xpath(
                    './/div[@class="tlcSpecs"]/span[@class="tlcSpecsDate"]/span[@class="tlcDetailsValue"]'
                )[0].text_content().strip()).strftime('%Y-%m-%d')
        except:
            try:
                detailsPageElements = HTML.ElementFromURL(
                    PAsearchSites.getSearchBaseURL(siteNum) +
                    searchResult.xpath('.//a[1]')[0].get('href'))
                releaseDate = parse(
                    detailsPageElements.xpath('//*[@class="updatedDate"]')
                    [0].text_content().strip()).strftime('%Y-%m-%d')
            except:
                releaseDate = ''
        if searchDate and releaseDate:
            score = 100 - Util.LevenshteinDistance(searchDate, releaseDate)
        else:
            score = 100 - Util.LevenshteinDistance(searchTitle.lower(),
                                                   titleNoFormatting.lower())

        results.Append(
            MetadataSearchResult(id=curID + "|" + str(siteNum),
                                 name=titleNoFormatting + " [" + network +
                                 PAsearchSites.getSearchSiteName(siteNum) +
                                 "] " + releaseDate,
                                 score=score,
                                 lang=lang))

    try:
        dvdResults = HTML.ElementFromURL(
            PAsearchSites.getSearchSearchURL(siteNum) + encodedTitle + "/dvd")
        for dvdResult in dvdResults.xpath(
                '//div[contains(@class,"tlcItem playlistable_dvds")] | //div[@class="tlcDetails"]'
        ):
            titleNoFormatting = dvdResult.xpath(
                './/div[@class="tlcTitle"]/a')[0].get('title').strip()
            curID = dvdResult.xpath('.//a')[0].get('href').replace(
                '/', '_').replace('?', '!')
            try:
                releaseDate = parse(
                    dvdResult.xpath(
                        './/div[@class="tlcSpecs"]/span[@class="tlcSpecsDate"]/span[@class="tlcDetailsValue"]'
                    )[0].text_content().strip())
            except:
                try:
                    detailsPageElements = HTML.ElementFromURL(
                        PAsearchSites.getSearchBaseURL(siteNum) +
                        dvdResult.xpath('.//a[1]')[0].get('href'))
                    releaseDate = parse(
                        detailsPageElements.xpath('//*[@class="updatedDate"]')
                        [0].text_content().strip())
                except:
                    releaseDate = ''

            score = 100 - Util.LevenshteinDistance(searchTitle.lower(),
                                                   titleNoFormatting.lower())

            results.Append(
                MetadataSearchResult(
                    id=curID + "|" + str(siteNum),
                    name=titleNoFormatting + " (" +
                    releaseDate.strftime('%Y') + ") - Full Movie [" +
                    PAsearchSites.getSearchSiteName(siteNum) + "]",
                    score=score,
                    lang=lang))
    except:
        pass
    return results
Пример #19
0
def update(metadata, siteID, movieGenres, movieActors):
    Log('******UPDATE CALLED*******')

    url = str(metadata.id).split("|")[0].replace('_', '/').replace(
        '?', '!').replace('/vids.html', '_vids.html')
    detailsPageElements = HTML.ElementFromURL(url)
    art = []
    metadata.collections.clear()
    movieGenres.clearGenres()
    movieActors.clearActors()

    # Studio
    metadata.studio = 'Jules Jordan'

    # Title
    metadata.title = detailsPageElements.xpath(
        '//span[@class="title_bar_hilite"]')[0].text_content().strip()

    # Summary
    metadata.summary = detailsPageElements.xpath(
        '//span[@class="update_description"]')[0].text_content().strip()

    # Tagline and Collection(s)
    tagline = PAsearchSites.getSearchSiteName(siteID)
    tagline = tagline.strip()
    metadata.tagline = tagline
    metadata.collections.add(tagline)

    try:
        dvdName = detailsPageElements.xpath(
            '//span[@class="update_dvds"]')[0].text_content().replace(
                'Movie:', '').strip()
        metadata.collections.add(dvdName)
    except:
        pass

    # Genres
    genres = detailsPageElements.xpath('//span[@class="update_tags"]/a')
    if len(genres) > 0:
        for genreLink in genres:
            genreName = genreLink.text_content().strip('\n').lower()
            movieGenres.addGenre(genreName)

    # Release Date
    date = detailsPageElements.xpath(
        '//div[@class="cell update_date"]')[0].text_content().strip()
    if date == '':
        try:
            date = str(
                detailsPageElements.xpath(
                    './/div[@class="cell update_date"]/comment()')[0]).strip()
            date = date[date.find('OFF') + 4:date.find('D',
                                                       date.find('OFF') +
                                                       4)].strip()
        except:
            pass
    if len(date) > 0:
        date_object = parse(date)
        metadata.originally_available_at = date_object
        metadata.year = metadata.originally_available_at.year

    # Actors
    if PAsearchSites.getSearchSiteName(siteID) == "GirlGirl":
        actors = detailsPageElements.xpath('//div[@class="item"]/span/div/a')
    else:
        actors = detailsPageElements.xpath(
            '//div[@class="backgroundcolor_info"]/span[@class="update_models"]/a'
        )
    if len(actors) > 0:
        for actorLink in actors:
            actorName = str(actorLink.text_content().strip())
            actorPageURL = actorLink.get("href")
            actorPage = HTML.ElementFromURL(actorPageURL)
            try:
                actorPhotoURL = actorPage.xpath(
                    '//img[@class="model_bio_thumb stdimage thumbs target"]'
                )[0].get("src0_3x")
                if 'http' not in actorPhotoURL:
                    actorPhotoURL = PAsearchSites.getSearchBaseURL(
                        siteID) + actorPhotoURL
            except:
                actorPhotoURL = ''
            movieActors.addActor(actorName, actorPhotoURL)

    # Posters

    # Video trailer background
    try:
        bigScript = detailsPageElements.xpath(
            '//script[contains(text(),"df_movie")]')[0].text_content()
        alpha = bigScript.find('useimage = "') + 12
        omega = bigScript.find('";', alpha)
        background = bigScript[alpha:omega]
        if 'http' not in background:
            background = PAsearchSites.getSearchBaseURL(siteID) + background
        Log("background: " + background)
        art.append(background)
    except:
        pass

    # Slideshow of images from the Search page
    try:
        bigScript = detailsPageElements.xpath(
            '//script[contains(text(),"df_movie")]')[0].text_content()
        alpha = bigScript.find('setid:"') + 7
        omega = bigScript.find('",', alpha)
        setID = bigScript[alpha:omega]
        Log("setID: " + setID)
        searchPageElements = HTML.ElementFromURL(
            (PAsearchSites.getSearchSearchURL(siteID) +
             metadata.title).replace(' ', '%20'))
        posterUrl = searchPageElements.xpath('//img[@id="set-target-' + setID +
                                             '"]')[0].get('src')
        if 'http' not in posterUrl:
            posterUrl = PAsearchSites.getSearchBaseURL(siteID) + posterUrl
        Log("slideshow: " + posterUrl)
        art.append(posterUrl)
        i = 0
        for i in range(0, 7):
            try:
                posterUrl = searchPageElements.xpath('//img[@id="set-target-' +
                                                     setID +
                                                     '"]')[0].get('src' + i +
                                                                  '_1x')
                if 'http' not in posterUrl:
                    posterUrl = PAsearchSites.getSearchBaseURL(
                        siteID) + posterUrl
                Log("slideshow: " + posterUrl)
                art.append(posterUrl)
            except:
                pass
    except:
        pass

    # Photos page
    try:
        photoPageURL = detailsPageElements.xpath(
            '//div[@class="cell content_tab"]/a[text()="Photos"]')[0].get(
                'href')
        photoPageElements = HTML.ElementFromURL(photoPageURL)
        bigScript = photoPageElements.xpath(
            '//script[contains(text(),"var ptx")]')[0].text_content()
        ptx1600starts = bigScript.find('1600')
        ptx1600ends = bigScript.find('togglestatus', ptx1600starts)
        ptx1600 = bigScript[ptx1600starts:ptx1600ends]
        photos = []
        i = 1
        alpha = 0
        omega = 0
        imageCount = ptx1600.count('ptx["1600"][')
        Log("Photos found: " + str(imageCount))
        while i <= imageCount:
            alpha = ptx1600.find('{src: "', omega) + 7
            omega = ptx1600.find('"', alpha)
            posterUrl = ptx1600[alpha:omega]
            if 'http' not in posterUrl:
                posterUrl = PAsearchSites.getSearchBaseURL(siteID) + posterUrl
            photos.append(posterUrl)
            i = i + 1
        for x in range(10):
            art.append(photos[random.randint(1, imageCount)])
    except:
        pass

    # Vidcaps page
    try:
        capsPageURL = detailsPageElements.xpath(
            '//div[@class="cell content_tab"]/a[text()="Caps"]')[0].get('href')
        capsPageElements = HTML.ElementFromURL(capsPageURL)
        bigScript = capsPageElements.xpath(
            '//script[contains(text(),"var ptx")]')[0].text_content()
        ptxjpgstarts = bigScript.find('ptx["jpg"] = {};')
        ptxjpgends = bigScript.find('togglestatus', ptxjpgstarts)
        ptxjpg = bigScript[ptxjpgstarts:ptxjpgends]
        vidcaps = []
        i = 1
        alpha = 0
        omega = 0
        imageCount = ptxjpg.count('ptx["jpg"][')
        Log("Vidcaps found: " + str(imageCount))
        while i <= imageCount:
            alpha = ptxjpg.find('{src: "', omega) + 7
            omega = ptxjpg.find('"', alpha)
            posterUrl = ptxjpg[alpha:omega]
            if 'http' not in posterUrl:
                posterUrl = PAsearchSites.getSearchBaseURL(siteID) + posterUrl
            vidcaps.append(posterUrl)
            i = i + 1
        for x in range(10):
            art.append(vidcaps[random.randint(1, imageCount)])
    except:
        pass

    j = 1
    Log("Artwork found: " + str(len(art)))
    for posterUrl in art:
        if not PAsearchSites.posterAlreadyExists(posterUrl, metadata):
            #Download image file for analysis
            try:
                img_file = urllib.urlopen(posterUrl)
                im = StringIO(img_file.read())
                resized_image = Image.open(im)
                width, height = resized_image.size
                #Add the image proxy items to the collection
                if width > 1 or height > width:
                    # Item is a poster
                    metadata.posters[posterUrl] = Proxy.Preview(HTTP.Request(
                        posterUrl,
                        headers={
                            'Referer': 'http://www.google.com'
                        }).content,
                                                                sort_order=j)
                if width > 100 and width > height:
                    # Item is an art item
                    metadata.art[posterUrl] = Proxy.Preview(HTTP.Request(
                        posterUrl,
                        headers={
                            'Referer': 'http://www.google.com'
                        }).content,
                                                            sort_order=j)
                j = j + 1
            except:
                pass

    return metadata
def update(metadata, siteNum, movieGenres, movieActors):
    metadata_id = str(metadata.id).split('|')
    sceneID = int(metadata_id[0])
    sceneType = metadata_id[2]
    sceneIDName = 'clip_id' if sceneType == 'scenes' else 'movie_id'
    sceneDate = metadata_id[3]

    apiKEY = getAPIKey(siteNum)

    url = PAsearchSites.getSearchSearchURL(
        siteNum
    ) + '?x-algolia-application-id=TSMKFA364Q&x-algolia-api-key=' + apiKEY
    data = getAlgolia(url, 'all_' + sceneType,
                      'filters=%s=%d' % (sceneIDName, sceneID),
                      PAsearchSites.getSearchBaseURL(siteNum))
    detailsPageElements = data[0]

    data = getAlgolia(url, 'all_scenes',
                      'query=%s' % detailsPageElements['url_title'],
                      PAsearchSites.getSearchBaseURL(siteNum))
    data = sorted(data, key=lambda i: i['clip_id'])
    scenesPagesElements = list(enumerate(data, 1))

    # Title
    title = None
    if sceneType == 'scenes' and len(scenesPagesElements) > 1:
        for idx, scene in scenesPagesElements:
            if scene['clip_id'] == sceneID:
                title = '%s, Scene %d' % (detailsPageElements['title'], idx)
                break
    if not title:
        title = detailsPageElements['title']

    metadata.title = title

    # Summary
    metadata.summary = detailsPageElements['description'].replace(
        '</br>', '\n').replace('<br>', '\n')

    # Studio
    metadata.studio = detailsPageElements['network_name']

    # Tagline and Collection(s)
    metadata.collections.clear()
    for collectionName in ['studio_name', 'serie_name']:
        if collectionName in detailsPageElements:
            metadata.collections.add(detailsPageElements[collectionName])
    if (':' in detailsPageElements['title'] or '#'
            in detailsPageElements['title']) and len(scenesPagesElements) > 1:
        if 'movie_title' in detailsPageElements:
            metadata.collections.add(detailsPageElements['movie_title'])

    # Release Date
    date_object = parse(sceneDate)
    metadata.originally_available_at = date_object
    metadata.year = metadata.originally_available_at.year

    # Genres
    movieGenres.clearGenres()
    for genreLink in detailsPageElements['categories']:
        genreName = genreLink['name']
        if genreName:
            movieGenres.addGenre(genreName)

    if sceneType == 'movies':
        for idx, scene in scenesPagesElements:
            for genreLink in scene['categories']:
                genreName = genreLink['name']
                if genreName:
                    movieGenres.addGenre(genreName)

    # Actors
    movieActors.clearActors()
    female = []
    male = []
    for actorLink in detailsPageElements['actors']:
        actorName = actorLink['name']

        actorData = getAlgolia(url, 'all_actors',
                               'filters=actor_id=' + actorLink['actor_id'],
                               PAsearchSites.getSearchBaseURL(siteNum))[0]
        if 'pictures' in actorData and actorData['pictures']:
            max_quality = sorted(actorData['pictures'].keys())[-1]
            actorPhotoURL = 'https://images-fame.gammacdn.com/actors' + actorData[
                'pictures'][max_quality]
        else:
            actorPhotoURL = ''

        if actorLink['gender'] == 'female':
            female.append((actorName, actorPhotoURL))
        else:
            male.append((actorName, actorPhotoURL))

    combined = female + male
    for actor in combined:
        movieActors.addActor(actor[0], actor[1])

    # Posters
    art = []

    if not PAsearchSites.getSearchBaseURL(siteNum).endswith(
        ('girlsway.com', 'puretaboo.com')):
        art.append(
            'https://images-fame.gammacdn.com/movies/{0}/{0}_{1}_front_400x625.jpg'
            .format(detailsPageElements['movie_id'],
                    detailsPageElements['url_title'].lower().replace('-',
                                                                     '_')))
        if 'url_movie_title' in detailsPageElements:
            art.append(
                'https://images-fame.gammacdn.com/movies/{0}/{0}_{1}_front_400x625.jpg'
                .format(
                    detailsPageElements['movie_id'],
                    detailsPageElements['url_movie_title'].lower().replace(
                        '-', '_')))

    if 'pictures' in detailsPageElements and detailsPageElements['pictures']:
        max_quality = detailsPageElements['pictures']['nsfw']['top'].keys()[0]
        pictureURL = 'https://images-fame.gammacdn.com/movies/' + detailsPageElements[
            'pictures'][max_quality]

        if sceneType == 'movies':
            art.append(pictureURL)
        else:
            art.insert(0, pictureURL)

    Log('Artwork found: %d' % len(art))
    for idx, posterUrl in enumerate(art, 1):
        if not PAsearchSites.posterAlreadyExists(posterUrl, metadata):
            # Download image file for analysis
            try:
                image = PAutils.HTTPRequest(posterUrl)
                im = StringIO(image.content)
                resized_image = Image.open(im)
                width, height = resized_image.size
                # Add the image proxy items to the collection
                if width > 1:
                    # Item is a poster
                    metadata.posters[posterUrl] = Proxy.Media(image.content,
                                                              sort_order=idx)
                if width > 100 and width > height:
                    # Item is an art item
                    metadata.art[posterUrl] = Proxy.Media(image.content,
                                                          sort_order=idx)
            except:
                pass

    return metadata
Пример #21
0
def update(metadata, lang, siteNum, movieGenres, movieActors):
    metadata_id = str(metadata.id).split('|')
    sceneName = metadata_id[0]
    releaseDate = metadata_id[2]
    contentName = metadata_id[3]

    detailsPageElements = getJSONfromPage(
        PAsearchSites.getSearchSearchURL(siteNum) +
        sceneName)[contentName][sceneName]

    # Title
    metadata.title = detailsPageElements['title']

    # Summary
    metadata.summary = detailsPageElements['description']

    # Studio
    metadata.studio = 'Mylf'

    # Tagline and Collection(s)
    metadata.collections.clear()
    if 'site' in detailsPageElements:
        subSite = detailsPageElements['site']['name']
    else:
        subSite = PAsearchSites.getSearchSiteName(siteNum)
    metadata.tagline = subSite
    if Prefs['collections_addsitename']:
        metadata.collections.add(subSite)

    # Release Date
    if releaseDate:
        date_object = parse(releaseDate)
        metadata.originally_available_at = date_object
        metadata.year = metadata.originally_available_at.year

    # Actors
    movieActors.clearActors()
    actors = detailsPageElements['models']
    for actorLink in actors:
        actorID = actorLink['modelId']
        actorName = actorLink['modelName']
        actorPhotoURL = ''

        actorData = getJSONfromPage(
            '%s/models/%s' %
            (PAsearchSites.getSearchBaseURL(siteNum), actorID))
        if actorData:
            actorPhotoURL = actorData['modelsContent'][actorID]['img']

        movieActors.addActor(actorName, actorPhotoURL)

    # Genres
    movieGenres.clearGenres()
    genres = ['MILF', 'Mature']

    if subSite.lower() == 'MylfBoss'.lower():
        for genreName in ['Office', 'Boss']:
            movieGenres.addGenre(genreName)
    elif subSite.lower() == 'MylfBlows'.lower():
        for genreName in ['B*****b']:
            movieGenres.addGenre(genreName)
    elif subSite.lower() == 'Milfty'.lower():
        for genreName in ['Cheating']:
            movieGenres.addGenre(genreName)
    elif subSite.lower() == 'Mom Drips'.lower():
        for genreName in ['Creampie']:
            movieGenres.addGenre(genreName)
    elif subSite.lower() == 'Milf Body'.lower():
        for genreName in ['Gym', 'Fitness']:
            movieGenres.addGenre(genreName)
    elif subSite.lower() == 'Lone Milf'.lower():
        for genreName in ['Solo']:
            movieGenres.addGenre(genreName)
    elif subSite.lower() == 'Full Of JOI'.lower():
        for genreName in ['JOI']:
            movieGenres.addGenre(genreName)
    elif subSite.lower() == 'Mylfed'.lower():
        for genreName in ['Lesbian', 'Girl on Girl', 'GG']:
            movieGenres.addGenre(genreName)
    elif subSite.lower() == 'MylfDom'.lower():
        for genreName in ['BDSM']:
            movieGenres.addGenre(genreName)
    if (len(actors) > 1) and subSite != 'Mylfed':
        genres.append('Threesome')

    for genreLink in genres:
        genreName = genreLink

        movieGenres.addGenre(genreName)

    # Posters
    art = [detailsPageElements['img']]

    Log('Artwork found: %d' % len(art))
    for idx, posterUrl in enumerate(art, 1):
        if not PAsearchSites.posterAlreadyExists(posterUrl, metadata):
            # Download image file for analysis
            try:
                image = PAutils.HTTPRequest(posterUrl)
                im = StringIO(image.content)
                resized_image = Image.open(im)
                width, height = resized_image.size
                # Add the image proxy items to the collection
                if width > 1:
                    # Item is a poster
                    metadata.posters[posterUrl] = Proxy.Media(image.content,
                                                              sort_order=idx)
                if width > 100 and width > height:
                    # Item is an art item
                    metadata.art[posterUrl] = Proxy.Media(image.content,
                                                          sort_order=idx)
            except:
                pass

    return metadata
Пример #22
0
def update(metadata, lang, siteNum, movieGenres, movieActors):
    metadata_id = str(metadata.id).split('|')
    sceneURL = PAutils.Decode(metadata_id[0])
    if not sceneURL.startswith('http'):
        sceneURL = PAsearchSites.getSearchSearchURL(siteNum) + sceneURL
    req = PAutils.HTTPRequest(sceneURL)
    detailsPageElements = HTML.ElementFromString(req.text)

    # Title
    metadata.title = detailsPageElements.xpath(
        '//h1')[0].text_content().strip()

    # Summary
    summary_xpaths = [
        '//div[@class="p-desc"]', '//div[contains(@class, "desc")]'
    ]

    for xpath in summary_xpaths:
        for summary in detailsPageElements.xpath(xpath):
            metadata.summary = summary.text_content().replace(
                'Read More »', '').strip()
            break

    # Studio
    metadata.studio = 'Score Group'

    # Tagline and Collection(s)
    metadata.collections.clear()
    tagline = PAsearchSites.getSearchSiteName(siteNum)
    metadata.tagline = tagline
    if Prefs['collections_addsitename']:
        metadata.collections.add(metadata.tagline)

    # Release Date
    date = detailsPageElements.xpath('//div/span[@class="value"]')
    if date:
        date = date[1].text_content().strip()
        date_object = parse(date)
        metadata.originally_available_at = date_object
        metadata.year = metadata.originally_available_at.year

    # Actors
    movieActors.clearActors()
    for actorLink in detailsPageElements.xpath('//div/span[@class="value"]/a'):
        actorName = actorLink.text_content().strip()
        actorPhotoURL = ''

        movieActors.addActor(actorName, actorPhotoURL)

    if siteNum == 1344:
        movieActors.addActor('Christy Marks', '')

    # Genres
    movieGenres.clearGenres()
    for genreLink in detailsPageElements.xpath('//div[@class="mb-3"]/a'):
        genreName = genreLink.text_content().strip()

        movieGenres.addGenre(genreName)

    # Posters/Background
    art = []

    match = re.search(r'posterImage: \'(.*)\'', req.text)
    if match:
        art.append(match.group(1))

    xpaths = [
        '//div[contains(@class, "thumb")]/img/@src',
        '//div[contains(@class, "p-image")]/a/img/@src',
        '//div[contains(@class, "dl-opts")]/a/img/@src',
        '//div[contains(@class, "p-photos")]/div/div/a/@href',
        '//div[contains(@class, "gallery")]/div/div/a/@href'
    ]

    for xpath in xpaths:
        for poster in detailsPageElements.xpath(xpath):
            if not poster.startswith('http'):
                poster = 'http:' + poster

            if 'shared-bits' not in poster:
                art.append(poster)

    Log('Artwork found: %d' % len(art))
    for idx, posterUrl in enumerate(art, 1):
        if not PAsearchSites.posterAlreadyExists(posterUrl, metadata):
            # Download image file for analysis
            try:
                image = PAutils.HTTPRequest(posterUrl)
                im = StringIO(image.content)
                resized_image = Image.open(im)
                width, height = resized_image.size
                # Add the image proxy items to the collection
                if width > 1:
                    # Item is a poster
                    metadata.posters[posterUrl] = Proxy.Media(image.content,
                                                              sort_order=idx)
                if width > 100:
                    # Item is an art item
                    metadata.art[posterUrl] = Proxy.Media(image.content,
                                                          sort_order=idx)
            except:
                pass

    return metadata
Пример #23
0
def search(results, encodedTitle, searchTitle, siteNum, lang, searchDate):
    searchResults = []
    siteResults = []
    temp = []
    count = 0

    sceneID = None
    splited = searchTitle.split(' ')
    if unicode(splited[0], 'UTF-8').isdigit():
        sceneID = splited[0]

        if int(sceneID) > 100:
            searchTitle = searchTitle.replace(sceneID, '', 1).strip()
            movieURL = '%s/movies/%s' % (
                PAsearchSites.getSearchBaseURL(siteNum), sceneID)
            searchResults.append(movieURL)

    encodedTitle = searchTitle.replace(' ', '+')
    searchURL = '%s%s' % (PAsearchSites.getSearchSearchURL(siteNum),
                          encodedTitle)
    req = PAutils.HTTPRequest(searchURL,
                              headers={'Referer': 'http://www.data18.com'})
    searchPageElements = HTML.ElementFromString(req.text)

    for searchResult in searchPageElements.xpath(
            '//a[contains(@href, "movies")]//parent::div[contains(@style, "float: left; padding")]'
    ):
        movieURL = searchResult.xpath('.//*[img]/@href')[0]
        urlID = re.sub(r'.*/', '', movieURL)

        if movieURL not in searchResults:
            titleNoFormatting = PAutils.parseTitle(
                searchResult.xpath('.//*[contains(@href, "movies")]')
                [1].text_content(), siteNum)
            curID = PAutils.Encode(movieURL)
            siteResults.append(movieURL)

            date = searchResult.text

            if date and not date == 'unknown':
                try:
                    releaseDate = datetime.strptime(
                        date, '%Y%m%d').strftime('%Y-%m-%d')
                except:
                    releaseDate = ''
            else:
                releaseDate = parse(searchDate).strftime(
                    '%Y-%m-%d') if searchDate else ''
            displayDate = releaseDate if date else ''

            if sceneID == urlID:
                score = 100
            elif searchDate and displayDate:
                score = 80 - Util.LevenshteinDistance(searchDate, releaseDate)
            else:
                score = 80 - Util.LevenshteinDistance(
                    searchTitle.lower(), titleNoFormatting.lower())

            if score == 80:
                count += 1
                temp.append(
                    MetadataSearchResult(
                        id='%s|%d|%s' % (curID, siteNum, releaseDate),
                        name='%s %s' % (titleNoFormatting, displayDate),
                        score=score,
                        lang=lang))
            else:
                results.Append(
                    MetadataSearchResult(
                        id='%s|%d|%s' % (curID, siteNum, releaseDate),
                        name='%s %s' % (titleNoFormatting, displayDate),
                        score=score,
                        lang=lang))

    googleResults = PAutils.getFromGoogleSearch(searchTitle, siteNum)
    for movieURL in googleResults:
        if ('/movies/' in movieURL and '.html' not in movieURL
                and movieURL not in searchResults
                and movieURL not in siteResults):
            searchResults.append(movieURL)

    for movieURL in searchResults:
        req = PAutils.HTTPRequest(movieURL)
        detailsPageElements = HTML.ElementFromString(req.text)
        urlID = re.sub(r'.*/', '', movieURL)

        try:
            siteName = detailsPageElements.xpath(
                '//i[contains(., "Network")]//preceding-sibling::a[1]'
            )[0].text_content().strip()
        except:
            try:
                siteName = detailsPageElements.xpath(
                    '//i[contains(., "Studio")]//preceding-sibling::a[1]'
                )[0].text_content().strip()
            except:
                try:
                    siteName = detailsPageElements.xpath(
                        '//i[contains(., "Site")]//preceding-sibling::a[1]'
                    )[0].text_content().strip()
                except:
                    siteName = ''

        titleNoFormatting = PAutils.parseTitle(
            detailsPageElements.xpath('//h1')[0].text_content(), siteNum)
        curID = PAutils.Encode(movieURL)

        try:
            date = detailsPageElements.xpath('//p[contains(., "Release")]')[
                0].text_content().text_content().split(':')[2].strip()
        except:
            date = ''

        if date and not date == 'unknown':
            releaseDate = parse(date).strftime('%Y-%m-%d')
        else:
            releaseDate = parse(searchDate).strftime(
                '%Y-%m-%d') if searchDate else ''
        displayDate = releaseDate if date else ''

        if sceneID == urlID:
            score = 100
        elif searchDate and displayDate:
            score = 80 - Util.LevenshteinDistance(searchDate, releaseDate)
        else:
            score = 80 - Util.LevenshteinDistance(searchTitle.lower(),
                                                  titleNoFormatting.lower())

        if score == 80:
            count += 1
            temp.append(
                MetadataSearchResult(
                    id='%s|%d|%s' % (curID, siteNum, releaseDate),
                    name='%s [%s] %s' %
                    (titleNoFormatting, siteName, displayDate),
                    score=score,
                    lang=lang))
        else:
            results.Append(
                MetadataSearchResult(
                    id='%s|%d|%s' % (curID, siteNum, releaseDate),
                    name='%s [%s] %s' %
                    (titleNoFormatting, siteName, displayDate),
                    score=score,
                    lang=lang))

    for result in temp:
        if count > 1 and result.score == 80:
            results.Append(
                MetadataSearchResult(id=result.id,
                                     name=result.name,
                                     score=79,
                                     lang=lang))
        else:
            results.Append(
                MetadataSearchResult(id=result.id,
                                     name=result.name,
                                     score=result.score,
                                     lang=lang))

    return results
Пример #24
0
def update(metadata, siteNum, movieGenres, movieActors):
    metadata_id = str(metadata.id).split('|')
    sceneID = metadata_id[0]

    detailsPageElements = getDataFromAPI(
        PAsearchSites.getSearchSearchURL(siteNum), 'identifier',
        sceneID)['hits']['hits'][0]['_source']

    # Title
    metadata.title = detailsPageElements['name']

    # Summary
    metadata.summary = detailsPageElements['description']

    # Studio
    metadata.studio = detailsPageElements['studio']['name'].title()

    # Tagline and Collection(s)
    metadata.collections.add(metadata.studio)
    seriesScene = detailsPageElements['series']['name']
    if seriesScene:
        metadata.collections.add(seriesScene.title())

    # Release Date
    date = detailsPageElements['releaseDate']
    date_object = datetime.strptime(date, '%Y-%m-%d')
    metadata.originally_available_at = date_object
    metadata.year = metadata.originally_available_at.year

    # Actors
    movieActors.clearActors()
    for actorLink in detailsPageElements['actors']:
        actorName = actorLink['name']
        actorPhotoURL = 'https://i.bang.com/pornstars/%d.jpg' % actorLink['id']

        movieActors.addActor(actorName, actorPhotoURL)

    # Genres
    movieGenres.clearGenres()
    for genreLink in detailsPageElements['genres']:
        genreName = genreLink['name']

        movieGenres.addGenre(genreName)

    # Posters
    dvdID = detailsPageElements['dvd']['id']
    art = ['https://i.bang.com/covers/%d/front.jpg' % dvdID]

    for img in detailsPageElements['screenshots']:
        art.append('https://i.bang.com/screenshots/%d/movie/1/%d.jpg' %
                   (dvdID, img['screenId']))

    Log('Artwork found: %d' % len(art))
    for idx, posterUrl in enumerate(art, 1):
        if not PAsearchSites.posterAlreadyExists(posterUrl, metadata):
            # Download image file for analysis
            try:
                image = PAutils.HTTPRequest(posterUrl)
                im = StringIO(image.content)
                resized_image = Image.open(im)
                width, height = resized_image.size
                # Add the image proxy items to the collection
                if width > 1:
                    # Item is a poster
                    metadata.posters[posterUrl] = Proxy.Media(image.content,
                                                              sort_order=idx)
                if width > 100 and width > height:
                    # Item is an art item
                    metadata.art[posterUrl] = Proxy.Media(image.content,
                                                          sort_order=idx)
            except:
                pass

    return metadata
Пример #25
0
def update(metadata, lang, siteNum, movieGenres, movieActors, art):
    metadata_id = str(metadata.id).split('|')
    sceneURL = PAutils.Decode(metadata_id[0])

    if not sceneURL.startswith('http'):
        sceneURL = PAsearchSites.getSearchSearchURL(siteNum) + sceneURL
    req = PAutils.HTTPRequest(sceneURL)
    detailsPageElements = HTML.ElementFromString(req.text)
    JAVID = sceneURL.rsplit('/', 1)[1]

    # Studio
    javStudio = detailsPageElements.xpath(
        '//p/a[contains(@href, "/studio/")]')[0].text_content().strip()
    metadata.studio = javStudio

    # Title
    javTitle = detailsPageElements.xpath(
        '//head/title')[0].text_content().strip().replace(' - JavBus', '')
    if JAVID.replace('-', '').replace('_', '').replace(' ', '').isdigit():
        javTitle = javStudio + ' ' + javTitle
    metadata.title = javTitle

    # Tagline
    data = {}

    label = detailsPageElements.xpath('//p/a[contains(@href, "/label/")]')
    if label:
        data['Label'] = label[0].text_content().strip()

    series = detailsPageElements.xpath('//p/a[contains(@href, "/series/")]')
    if series:
        data['Series'] = series[0].text_content().strip()

    metadata.tagline = ', '.join(
        ['%s: %s' % (key, value) for key, value in data.items()])

    # Release Date
    date = detailsPageElements.xpath('//div[@class="col-md-3 info"]/p[2]'
                                     )[0].text_content().strip().replace(
                                         'Release Date: ', '')
    date_object = datetime.strptime(date, '%Y-%m-%d')
    metadata.originally_available_at = date_object
    metadata.year = metadata.originally_available_at.year

    # Genres
    for genreLink in detailsPageElements.xpath(
            '//span[@class="genre"]/a[contains(@href, "/genre/")]'):
        genreName = genreLink.text_content().lower().strip()
        movieGenres.addGenre(genreName)

    metadata.collections.add('Japan Adult Video')

    # Actors
    movieActors.clearActors()
    for actorLink in detailsPageElements.xpath('//a[@class="avatar-box"]'):
        fullActorName = actorLink.text_content().strip()

        actorPhotoURL = detailsPageElements.xpath(
            '//a[@class="avatar-box"]/div[@class="photo-frame"]/img[contains(@title, "%s")]/@src'
            % (fullActorName))[0]
        if not actorPhotoURL.startswith('http'):
            actorPhotoURL = PAsearchSites.getSearchBaseURL(
                siteNum) + actorPhotoURL

        if actorPhotoURL.rsplit('/', 1)[1] == 'nowprinting.gif':
            actorPhotoURL = ''

        movieActors.addActor(fullActorName, actorPhotoURL)

    # Posters
    xpaths = [
        '//a[contains(@href, "/cover/")]/@href',
        '//a[@class="sample-box"]/div/img/@src',
    ]
    for xpath in xpaths:
        for poster in detailsPageElements.xpath(xpath):
            if not poster.startswith('http'):
                poster = PAsearchSites.getSearchBaseURL(siteNum) + poster

            art.append(poster)

    coverImage = detailsPageElements.xpath(
        '//a[contains(@href, "/cover/")]/@href')
    coverImageCode = coverImage[0].rsplit('/',
                                          1)[1].split('.')[0].split('_')[0]
    imageHost = coverImage[0].rsplit('/', 2)[0]
    coverImage = imageHost + '/thumb/' + coverImageCode + '.jpg'
    if coverImage.count('/images.') == 1:
        coverImage = coverImage.replace('thumb', 'thumbs')

    if not coverImage.startswith('http'):
        coverImage = PAsearchSites.getSearchBaseURL(siteNum) + coverImage

    art.append(coverImage)

    Log('Artwork found: %d' % len(art))
    for idx, posterUrl in enumerate(art, 1):
        if not PAsearchSites.posterAlreadyExists(posterUrl, metadata):
            # Download image file for analysis
            try:
                image = PAutils.HTTPRequest(posterUrl)
                im = StringIO(image.content)
                resized_image = Image.open(im)
                width, height = resized_image.size
                # Add the image proxy items to the collection
                if width > 1:
                    # Item is a poster
                    metadata.posters[posterUrl] = Proxy.Media(image.content,
                                                              sort_order=idx)
                if width > 100 and idx > 1:
                    # Item is an art item
                    metadata.art[posterUrl] = Proxy.Media(image.content,
                                                          sort_order=idx)
            except:
                pass

    return metadata
def update(metadata, siteID, movieGenres, movieActors):
    detailsPageURL = str(metadata.id).split("|")[0].replace('_', '/').replace(
        "!", "?")
    detailsPageElements = HTML.ElementFromURL(detailsPageURL)
    thisPage = detailsPageElements.xpath(
        '//a[contains(text(),"trailer")]')[0].get('href')

    metadata.collections.clear()
    metadata.studio = "Digital Playground"
    art = []

    # Title
    title = detailsPageElements.xpath('//h1')[0].text_content().strip()

    #Determine what we're looking for and gather the information as needed
    if "/series/" in detailsPageURL:
        # This is an episode in a Series
        seriesInfoPageElements = HTML.ElementFromURL(
            PAsearchSites.getSearchBaseURL(siteID) + detailsPageElements.xpath(
                '//a[contains(text(),"info")]')[0].get("href"))
        seriesTrailerPageElements = HTML.ElementFromURL(
            PAsearchSites.getSearchBaseURL(siteID) +
            seriesInfoPageElements.xpath('//a[@class="watch-now"]')[0].get(
                "href"))
        art.append(
            seriesTrailerPageElements.xpath('//div[@class="trailer-player "]')
            [0].get('data-poster-image'))
        tagline = "Series: " + seriesInfoPageElements.xpath(
            '//h1')[0].text_content().strip()
        summary = seriesInfoPageElements.xpath(
            '//div[@class="overview"]//p')[0].text_content().strip()
        genres = detailsPageElements.xpath(
            '//ul[@id="movie-info-format" and last()]/li/div/a')
        try:
            # Series needs to define the Episode Number and pull only actors from that episode
            actors = detailsPageElements.xpath(
                '//a[@href="' + thisPage +
                '" and last()]//following-sibling::div[@class="model-names-wrapper"]/span[@class="model-names"]/a'
            )
            if len(actors) == 0:
                raise
        except:
            # I could put a backup plan here to pull actors from the Series Info page...
            pass

    elif "/movies/" in detailsPageURL:
        movieInfoPageElements = HTML.ElementFromURL(
            PAsearchSites.getSearchBaseURL(siteID) + detailsPageElements.xpath(
                '//a[contains(text(),"info")]')[0].get("href"))
        tagline = "Blockbuster"
        summary = movieInfoPageElements.xpath(
            '//div[@class="overview"]//p')[0].text_content().strip()
        genres = movieInfoPageElements.xpath('//div[@class="box-tag"]/a')
        if "sceneid" in detailsPageURL:
            # This is an individual scene from a Blockbuster
            metadata.collections.add(title)
            k = detailsPageURL.rfind("=")
            sceneID = detailsPageURL[k + 1:].strip()
            sceneImg = movieInfoPageElements.xpath('//img[@alt="' + title +
                                                   ' - Scene ' + sceneID +
                                                   '"]')[0].get('data-srcset')
            k = sceneImg.rfind("/")
            art.append("https:" + sceneImg[:k + 1] + "1290x726_1.jpg")
            title = title + ": Scene " + sceneID
            try:
                # Pull the actors for just that one scene
                actors = movieInfoPageElements.xpath(
                    '//h4[text()="Scene ' + sceneID +
                    ': "]//following-sibling::a')
                if len(actors) == 0:
                    raise
            except:
                pass
        else:
            # This is a full Blockbuster movie
            try:
                actors = movieInfoPageElements.xpath(
                    '//div[@class="box-card model  "]/div[@class="title-bar"]/div[@class="title-text"]/div/h4/a'
                )
                if len(actors) == 0:
                    raise
            except:
                pass
            sceneImgs = movieInfoPageElements.xpath(
                '//div[@class="box-card scene"]/div[@class="preview-image"]/a/img'
            )
            for sceneImg in sceneImgs:
                imgSrc = sceneImg.get('data-srcset')
                k = imgSrc.rfind("/")
                art.append("https:" + imgSrc[:k + 1] + "1290x726_1.jpg")
        art.append("http:" + movieInfoPageElements.xpath(
            '//img[@id="front-cover-hd"]')[0].get('src'))
        art.append("http:" + movieInfoPageElements.xpath(
            '//img[@id="back-cover-hd"]')[0].get('src'))
    else:
        # This must be a Flixxx or Raw Cuts or something else
        tagline = detailsPageElements.xpath(
            '//a[contains(@class,"full-scene-button")]')[0].text_content(
            ).strip()
        genres = detailsPageElements.xpath(
            '//ul[@id="movie-info-format" and last()]/li/div/a')
        try:
            # Sometimes it just doesn't have a synopsis...
            summary = detailsPageElements.xpath(
                '//span[text()="SYNOPSIS"]//following::span')[0].text_content(
                ).strip()
        except:
            pass

        try:
            actors = detailsPageElements.xpath(
                '//span[@class="subtitle" and text()="STARRING"]//following::span[1]//a'
            )
            if len(actors) == 0:
                raise
        except:
            Log("Fallback plan for Actors reached")
            searchPageElements = HTML.ElementFromURL(
                PAsearchSites.getSearchSearchURL(siteID) + urllib.quote(title))
            actors = searchPageElements.xpath('//h4[contains(text(),"' +
                                              title +
                                              '")]//following-sibling::a')
        art.append(
            detailsPageElements.xpath('//div[@class="trailer-player "]')
            [0].get('data-poster-image'))

    art.append(
        detailsPageElements.xpath('//div[@class="trailer-player "]')[0].get(
            'data-poster-image'))
    tagline = "DP " + tagline

    metadata.collections.add(tagline)
    metadata.tagline = tagline
    metadata.title = title
    metadata.summary = summary

    # Genres
    movieGenres.clearGenres()
    Log("Genres found: " + str(len(genres)))
    if len(genres) > 0:
        for genre in genres:
            genreName = str(genre.text_content().lower().strip())
            movieGenres.addGenre(genreName)

    # Date
    try:
        releaseDate = detailsPageElements.xpath(
            '//ul[contains(@class,"movie-details")]//span')[0].text_content()
    except:
        releaseDate = detailsPageElements.xpath(
            './/div[@class="release-info"]/div[@class="info-left"]/span[2]'
        )[0].text_content().strip()

    if len(releaseDate) > 0:
        date_object = datetime.strptime(releaseDate, '%m-%d-%Y')
        metadata.originally_available_at = date_object
        metadata.year = metadata.originally_available_at.year

    # Actors
    movieActors.clearActors()
    Log("Actors found: " + str(len(actors)))
    if len(actors) > 0:
        for actorLink in actors:
            actorPageURL = actorLink.get("href")
            if "/model/" in actorPageURL:  # dirty hack to filter out the extra actor I was getting that was named for some other scene; actual problem is probably just my xpath search for actors above
                actorName = str(actorLink.text_content().strip())
                actorPage = HTML.ElementFromURL(
                    PAsearchSites.getSearchBaseURL(siteID) + actorPageURL)
                actorPhotoURL = "https:" + actorPage.xpath(
                    '//div[@class="preview-image"]//img')[0].get("src")
                movieActors.addActor(actorName, actorPhotoURL)

    # Posters
    j = 1
    Log("Artwork found: " + str(len(art)))
    for posterUrl in art:
        if not posterAlreadyExists(posterUrl, metadata):
            #Download image file for analysis
            try:
                img_file = urllib.urlopen(posterUrl)
                im = StringIO(img_file.read())
                resized_image = Image.open(im)
                width, height = resized_image.size
                #Add the image proxy items to the collection
                if (width > 1):
                    # Item is a poster
                    metadata.posters[posterUrl] = Proxy.Preview(HTTP.Request(
                        posterUrl,
                        headers={
                            'Referer': 'http://www.google.com'
                        }).content,
                                                                sort_order=j)
                if (width > 100):
                    # Item is an art item
                    metadata.art[posterUrl] = Proxy.Preview(HTTP.Request(
                        posterUrl,
                        headers={
                            'Referer': 'http://www.google.com'
                        }).content,
                                                            sort_order=j)
                j = j + 1
            except:
                pass

    return metadata
def search(results, lang, siteNum, searchData):
    # Advanced Search
    req = PAutils.HTTPRequest(
        PAsearchSites.getSearchSearchURL(siteNum) + searchData.encoded)
    searchResults = HTML.ElementFromString(req.text)
    for searchResult in searchResults.xpath(
            '//div[contains(@class, "item-info")]'):
        titleNoFormatting = searchResult.xpath(
            './/a')[0].text_content().strip()
        curID = PAutils.Encode(searchResult.xpath('.//a/@href')[0])
        releaseDate = parse(
            searchResult.xpath('.//span[@class="date"]')
            [0].text_content().strip()).strftime('%Y-%m-%d')

        if searchData.date:
            score = 100 - Util.LevenshteinDistance(searchData.date,
                                                   releaseDate)
        else:
            score = 100 - Util.LevenshteinDistance(searchData.title.lower(),
                                                   titleNoFormatting.lower())

        results.Append(
            MetadataSearchResult(id='%s|%d' % (curID, siteNum),
                                 name='%s [Femdom Empire] %s' %
                                 (titleNoFormatting, releaseDate),
                                 score=score,
                                 lang=lang))

    # Difficult Scenes
    if searchData.title in manualMatch:
        item = manualMatch[searchData.title]
        curID = PAutils.Encode(item['curID'])

        results.Append(
            MetadataSearchResult(id='%s|%d' % (curID, siteNum),
                                 name=item['name'],
                                 score=101,
                                 lang=lang))

    if results:
        return results

    # Standard Search
    else:
        req = PAutils.HTTPRequest(
            PAsearchSites.getSearchBaseURL(siteNum) +
            '/tour/search.php?query=' + searchData.encoded)
        searchResults = HTML.ElementFromString(req.text)
        for searchResult in searchResults.xpath(
                '//div[contains(@class, "item-info")]'):
            titleNoFormatting = searchResult.xpath(
                './/a')[0].text_content().strip()
            scenePage = searchResult.xpath('.//a/@href')[0]
            curID = PAutils.Encode(scenePage)
            releaseDate = parse(
                searchResult.xpath('.//span[@class="date"]')
                [0].text_content().strip()).strftime('%Y-%m-%d')

            if searchData.date:
                score = 100 - Util.LevenshteinDistance(searchData.date,
                                                       releaseDate)
            else:
                score = 100 - Util.LevenshteinDistance(
                    searchData.title.lower(), titleNoFormatting.lower())

        results.Append(
            MetadataSearchResult(id='%s|%d' % (curID, siteNum),
                                 name='%s [Femdom Empire] %s' %
                                 (titleNoFormatting, releaseDate),
                                 score=score,
                                 lang=lang))

    return results
def search(results, lang, siteNum, searchData):
    searchResults = []
    siteResults = []
    temp = []
    directID = False
    count = 0

    sceneID = None
    parts = searchData.title.split()
    if unicode(parts[0], 'UTF-8').isdigit():
        sceneID = parts[0]

        if int(sceneID) > 100:
            searchData.title = searchData.title.replace(sceneID, '', 1).strip()
            movieURL = '%s/%s' % (PAsearchSites.getSearchBaseURL(siteNum),
                                  sceneID)
            searchResults.append(movieURL)
            directID = True

    searchData.encoded = searchData.title.replace(' ', '+')
    searchURL = '%s%s' % (PAsearchSites.getSearchSearchURL(siteNum),
                          searchData.encoded)
    req = PAutils.HTTPRequest(
        searchURL, headers={'Referer': 'http://www.data18.empirestores.co'})
    searchPageElements = HTML.ElementFromString(req.text)
    if not directID:
        for searchResult in searchPageElements.xpath(
                '//div[@class="product-card"]'):
            movieURL = '%s%s' % (
                PAsearchSites.getSearchBaseURL(siteNum),
                searchResult.xpath(
                    './div[@class="boxcover-container"]/a/@href')[0])
            urlID = searchResult.xpath(
                './div[@class="boxcover-container"]/a/@href')[0].split("/")[1]
            if movieURL not in searchResults:
                titleNoFormatting = PAutils.parseTitle(
                    searchResult.xpath(
                        './div[@class="product-details"]/div/a/text()')
                    [0].strip(), siteNum)
                curID = PAutils.Encode(movieURL)
                siteResults.append(movieURL)

                if sceneID == urlID:
                    score = 100
                elif searchData.date and displayDate:
                    score = 80 - Util.LevenshteinDistance(
                        searchData.date, releaseDate)
                else:
                    score = 80 - Util.LevenshteinDistance(
                        searchData.title.lower(), titleNoFormatting.lower())

                if score > 70:
                    sceneURL = PAutils.Decode(curID)
                    req = PAutils.HTTPRequest(sceneURL)
                    detailsPageElements = HTML.ElementFromString(req.text)

                    # Find date on movie specific page
                    date = detailsPageElements.xpath(
                        '//ul[@class="list-unstyled m-b-2"]/li[contains(., "Released:")]/text()'
                    )[0].strip()
                    if date and not date == 'unknown':
                        try:
                            releaseDate = datetime.strptime(
                                date, '%b %d, %Y').strftime('%Y-%m-%d')
                        except:
                            releaseDate = ''
                    else:
                        releaseDate = searchData.dateFormat(
                        ) if searchData.date else ''
                    displayDate = releaseDate if date else ''

                    # Studio
                    try:
                        studio = detailsPageElements.xpath(
                            '//ul[@class="list-unstyled m-b-2"]/li[contains(., "Studio:")]/a/text()'
                        )[0].strip()
                    except:
                        studio = ''
                    if sceneID == urlID:
                        score = 100
                    elif searchData.date and displayDate:
                        score = 80 - Util.LevenshteinDistance(
                            searchData.date, releaseDate)
                    else:
                        score = 80 - Util.LevenshteinDistance(
                            searchData.title.lower(),
                            titleNoFormatting.lower())

                    if score == 80:
                        count += 1
                        temp.append(
                            MetadataSearchResult(
                                id='%s|%d|%s' % (curID, siteNum, releaseDate),
                                name='%s [%s] %s' %
                                (titleNoFormatting, studio, displayDate),
                                score=score,
                                lang=lang))
                    else:
                        results.Append(
                            MetadataSearchResult(
                                id='%s|%d|%s' % (curID, siteNum, releaseDate),
                                name='%s [%s] %s' %
                                (titleNoFormatting, studio, displayDate),
                                score=score,
                                lang=lang))

                    # Split Scenes
                    scenes = detailsPageElements.xpath(
                        '//div[@class="product-details-container"]/div[@class="container"]/div[@class="row"]'
                    )
                    sceneCount = (len(scenes) - 1) / 2
                    for sceneNum in range(0, sceneCount):
                        section = 'Scene %d' % (sceneNum + 1)
                        actorNames = ', '.join(
                            detailsPageElements.xpath(
                                '//div[@class="container"]/div[@class="row"][./div[@class="col-sm-6 text-right text-left-xs m-b-1"]][%d]/div[2]/div/a/text()'
                                % (sceneNum + 1)))
                        if score == 80:
                            count += 1
                            temp.append(
                                MetadataSearchResult(
                                    id='%s|%d|%s|%d' %
                                    (curID, siteNum, releaseDate,
                                     sceneNum + 1),
                                    name='%s/#%d[%s][%s] %s' %
                                    (titleNoFormatting, sceneNum + 1,
                                     actorNames, studio, displayDate),
                                    score=score,
                                    lang=lang))
                        else:
                            results.Append(
                                MetadataSearchResult(
                                    id='%s|%d|%s|%d' %
                                    (curID, siteNum, releaseDate,
                                     sceneNum + 1),
                                    name='%s/#%d[%s][%s] %s' %
                                    (titleNoFormatting, sceneNum + 1,
                                     actorNames, studio, displayDate),
                                    score=score,
                                    lang=lang))
                else:
                    if score == 80:
                        count += 1
                        temp.append(
                            MetadataSearchResult(
                                id='%s|%d|%s' % (curID, siteNum, releaseDate),
                                name='%s %s' %
                                (titleNoFormatting, displayDate),
                                score=score,
                                lang=lang))
                    else:
                        results.Append(
                            MetadataSearchResult(
                                id='%s|%d|%s' % (curID, siteNum, releaseDate),
                                name='%s %s' %
                                (titleNoFormatting, displayDate),
                                score=score,
                                lang=lang))

    googleResults = PAutils.getFromGoogleSearch(searchData.title, siteNum)
    for movieURL in googleResults:
        if ('/movies/' in movieURL and '.html' not in movieURL
                and movieURL not in searchResults
                and movieURL not in siteResults):
            searchResults.append(movieURL)

    for movieURL in searchResults:
        req = PAutils.HTTPRequest(movieURL)
        detailsPageElements = HTML.ElementFromString(req.text)
        urlID = re.sub(r'.*/', '', movieURL)
        titleNoFormatting = PAutils.parseTitle(
            detailsPageElements.xpath('//h1/text()')[0].strip(), siteNum)
        curID = PAutils.Encode(movieURL)

        date = detailsPageElements.xpath(
            '//ul[@class="list-unstyled m-b-2"]/li[contains(., "Released:")]/text()'
        )[0].strip()
        if date and not date == 'unknown':
            try:
                releaseDate = datetime.strptime(
                    date, '%b %d %Y').strftime('%Y-%m-%d')
            except:
                releaseDate = ''
        else:
            releaseDate = searchData.dateFormat() if searchData.date else ''
        displayDate = releaseDate if date else ''

        if sceneID == urlID:
            score = 100
        elif searchData.date and displayDate:
            score = 80 - Util.LevenshteinDistance(searchData.date, releaseDate)
        else:
            score = 80 - Util.LevenshteinDistance(searchData.title.lower(),
                                                  titleNoFormatting.lower())

        # Studio
        try:
            studio = detailsPageElements.xpath(
                '//ul[@class="list-unstyled m-b-2"]/li[contains(., "Studio:")]/a/text()'
            )[0].strip()
        except:
            studio = ''

        if score == 80:
            count += 1
            temp.append(
                MetadataSearchResult(id='%s|%d|%s' %
                                     (curID, siteNum, releaseDate),
                                     name='%s [%s] %s' %
                                     (titleNoFormatting, studio, displayDate),
                                     score=score,
                                     lang=lang))
        else:
            results.Append(
                MetadataSearchResult(id='%s|%d|%s' %
                                     (curID, siteNum, releaseDate),
                                     name='%s [%s] %s' %
                                     (titleNoFormatting, studio, displayDate),
                                     score=score,
                                     lang=lang))

        # Split Scenes
        scenes = detailsPageElements.xpath(
            '//div[@class="product-details-container"]/div[@class="container"]/div[@class="row"]'
        )
        sceneCount = (len(scenes) - 1) / 2
        for sceneNum in range(0, sceneCount):
            actorNames = ', '.join(
                detailsPageElements.xpath(
                    '//div[@class="container"]/div[@class="row"][./div[@class="col-sm-6 text-right text-left-xs m-b-1"]][%d]/div[2]/div/a/text()'
                    % (sceneNum + 1)))
            if score == 80:
                count += 1
                temp.append(
                    MetadataSearchResult(
                        id='%s|%d|%s|%d' %
                        (curID, siteNum, releaseDate, sceneNum + 1),
                        name='%s/#%d[%s][%s] %s' %
                        (titleNoFormatting, sceneNum + 1, actorNames, studio,
                         displayDate),
                        score=score,
                        lang=lang))
            else:
                results.Append(
                    MetadataSearchResult(
                        id='%s|%d|%s|%d' %
                        (curID, siteNum, releaseDate, sceneNum + 1),
                        name='%s/#%d[%s][%s] %s' %
                        (titleNoFormatting, sceneNum + 1, actorNames, studio,
                         displayDate),
                        score=score,
                        lang=lang))

    for result in temp:
        if count > 1 and result.score == 80:
            results.Append(
                MetadataSearchResult(id=result.id,
                                     name=result.name,
                                     score=79,
                                     lang=lang))
        else:
            results.Append(
                MetadataSearchResult(id=result.id,
                                     name=result.name,
                                     score=result.score,
                                     lang=lang))

    return results
Пример #29
0
def update(metadata, siteID, movieGenres, movieActors):
    metadata_id = str(metadata.id).split('|')
    sceneName = metadata_id[0]
    releaseDate = metadata_id[2]
    contentName = metadata_id[3]

    detailsPageElements = getJSONfromPage(
        PAsearchSites.getSearchSearchURL(siteID) +
        sceneName)[contentName][sceneName]

    # Studio
    metadata.studio = 'Mylf'

    # Title
    metadata.title = detailsPageElements['title']

    # Summary
    metadata.summary = detailsPageElements['description']

    # Tagline and Collection(s)
    metadata.collections.clear()
    if 'site' in detailsPageElements:
        subSite = detailsPageElements['site']['name']
    else:
        subSite = PAsearchSites.getSearchSiteName(siteID)
    metadata.tagline = subSite
    metadata.collections.add(subSite)

    # Release Date
    if releaseDate:
        date_object = parse(releaseDate)
        metadata.originally_available_at = date_object
        metadata.year = metadata.originally_available_at.year

    # Actors
    movieActors.clearActors()
    actors = detailsPageElements['models']
    for actorLink in actors:
        actorID = actorLink['modelId']
        actorName = actorLink['modelName']
        actorPhotoURL = ''

        actorData = getJSONfromPage(
            '%s/models/%s' % (PAsearchSites.getSearchBaseURL(siteID), actorID))
        if actorData:
            actorPhotoURL = actorData['modelsContent'][actorID]['img']

        movieActors.addActor(actorName, actorPhotoURL)

    # Genres
    movieGenres.clearGenres()
    genres = ["MILF", "Mature"]

    if subSite.lower() == "MylfBoss".lower():
        for genreName in ['Office', 'Boss']:
            movieGenres.addGenre(genreName)
    elif subSite.lower() == "MylfBlows".lower():
        for genreName in ['B*****b']:
            movieGenres.addGenre(genreName)
    elif subSite.lower() == "Milfty".lower():
        for genreName in ['Cheating']:
            movieGenres.addGenre(genreName)
    elif subSite.lower() == "Mom Drips".lower():
        for genreName in ['Creampie']:
            movieGenres.addGenre(genreName)
    elif subSite.lower() == "Milf Body".lower():
        for genreName in ['Gym', 'Fitness']:
            movieGenres.addGenre(genreName)
    elif subSite.lower() == "Lone Milf".lower():
        for genreName in ['Solo']:
            movieGenres.addGenre(genreName)
    elif subSite.lower() == "Full Of JOI".lower():
        for genreName in ['JOI']:
            movieGenres.addGenre(genreName)
    elif subSite.lower() == "Mylfed".lower():
        for genreName in ['Lesbian', 'Girl on Girl', 'GG']:
            movieGenres.addGenre(genreName)
    elif subSite.lower() == "MylfDom".lower():
        for genreName in ['BDSM']:
            movieGenres.addGenre(genreName)
    if (len(actors) > 1) and subSite != "Mylfed":
        genres.append("Threesome")

    for genre in genres:
        movieGenres.addGenre(genre)

    # Posters
    art = [detailsPageElements['img']]

    Log('Artwork found: %d' % len(art))
    headers = {
        'User-Agent':
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'
    }
    for idx, posterUrl in enumerate(art, 1):
        if not PAsearchSites.posterAlreadyExists(posterUrl, metadata):
            # Download image file for analysis
            try:
                req = urllib.Request(posterUrl, headers=headers)
                img_file = urllib.urlopen(req)
                im = StringIO(img_file.read())
                resized_image = Image.open(im)
                width, height = resized_image.size
                # Add the image proxy items to the collection
                if width > 1:
                    # Item is a poster
                    metadata.posters[posterUrl] = Proxy.Media(HTTP.Request(
                        posterUrl, headers=headers).content,
                                                              sort_order=idx)
                if width > 100 and width > height:
                    # Item is an art item
                    metadata.art[posterUrl] = Proxy.Media(HTTP.Request(
                        posterUrl, headers=headers).content,
                                                          sort_order=idx)
            except:
                pass

    return metadata
Пример #30
0
def update(metadata, siteNum, movieGenres, movieActors):
    metadata_id = str(metadata.id).split('|')
    sceneURL = PAsearchSites.getSearchSearchURL(siteNum) + metadata_id[0]
    req = PAutils.HTTPRequest(sceneURL)
    detailsPageElements = HTML.ElementFromString(req.text)

    # Title
    metadata.title = detailsPageElements.xpath(
        '//h1')[0].text_content().strip()

    # Summary
    metadata.summary = detailsPageElements.xpath(
        '//div[@class="video-group-bottom"]/p | //p[@class="u-lh--opt"] | //div[@class="video-info"]/p | //div[@class="desc"]'
    )[0].text_content().strip()

    # Studio
    metadata.studio = 'HighTechVR'

    # Tagline and Collection
    metadata.collections.clear()
    tagline = detailsPageElements.xpath('//title')[0].text_content().split(
        '|')[1].strip()
    metadata.tagline = tagline
    metadata.collections.add(tagline)

    # Release Date
    date = detailsPageElements.xpath(
        '//span[@class="date-display-single"] | //span[@class="u-inline-block u-mr--nine"] | //div[@class="video-meta-date"] | //div[@class="date"]'
    )[0].text_content().strip()
    date_object = parse(date)
    metadata.originally_available_at = date_object
    metadata.year = metadata.originally_available_at.year

    # Genres
    movieGenres.clearGenres()
    for genreLink in detailsPageElements.xpath(
            '//div[contains(@class, "video-tags")]//a | //div[@class="tags"]//a'
    ):
        genreName = genreLink.text_content().strip()

        movieGenres.addGenre(genreName)

    # Actors
    movieActors.clearActors()
    for actorLink in detailsPageElements.xpath(
            '//div[@class="video-actress-name"]//a | //div[@class="u-mt--three u-mb--three"]//a | //div[@class="model-one-inner js-trigger-lazy-item"]//a | //div[@class="featuring commed"]//a'
    ):
        actorName = actorLink.text_content().strip()

        actorPageURL = PAsearchSites.getSearchBaseURL(siteNum) + actorLink.get(
            'href')
        req = PAutils.HTTPRequest(actorPageURL)
        actorPage = HTML.ElementFromString(req.text)
        actorPhotoURL = actorPage.xpath(
            '//div[contains(@class, "model-img-wrapper")]/figure/a/img | //div[contains(@class, "u-ratio--model-poster")]//img | //div[contains(@class, "model-one-inner")]//img | //div[contains(@class, "row actor-info")]//img'
        )[0].get('src').split('?')[0]

        movieActors.addActor(actorName, actorPhotoURL)

    # Posters
    art = []

    for poster in detailsPageElements.xpath(
            '//div[contains(@class,"video-gallery")]//div//figure//a | //a[@class="u-block u-ratio u-ratio--lightbox u-bgc--back-opt u-z--zero"] | //div[@class="scene-previews-container"]//a'
    ):
        img = poster.get('href').split('?')[0]
        if img.startswith('http'):
            art.append(img)

    poster = detailsPageElements.xpath(
        '//div[@class="splash-screen fullscreen-message is-visible"]/@style'
    )[0]
    img = poster.split('url(')[1].split(')')[0]
    art.append(img)

    Log('Artwork found: %d' % len(art))
    for idx, posterUrl in enumerate(art, 1):
        if not PAsearchSites.posterAlreadyExists(posterUrl, metadata):
            # Download image file for analysis
            try:
                image = PAutils.HTTPRequest(posterUrl)
                im = StringIO(image.content)
                resized_image = Image.open(im)
                width, height = resized_image.size
                # Add the image proxy items to the collection
                if width > 1:
                    # Item is a poster
                    metadata.posters[posterUrl] = Proxy.Media(image.content,
                                                              sort_order=idx)
                if width > 100:
                    # Item is an art item
                    metadata.art[posterUrl] = Proxy.Media(image.content,
                                                          sort_order=idx)
            except:
                pass

    return metadata