Пример #1
0
    def matchRatioWithCaption(self, caption, quickMatch=False):

        assert (caption)

        matchRatio = 0.0

        try:
            if self.textCompare == None:
                self.textCompare = Caption._textForComparison(self.text)

            textA = self.textCompare

            if caption.textCompare == None:
                caption.textCompare = Caption._textForComparison(caption.text)

            textB = caption.textCompare

            assert (len(textA) > 0)
            assert (len(textB) > 0)

            matchRatio = difflib.SequenceMatcher(None, textA, textB).ratio()

            if (matchRatio > 0.02) and (matchRatio < 0.92):
                #construct 2 keys with the hashes of each string in each order. Check the cache before performing a match operation
                hashA = hashlib.md5()
                hashA.update(textA)

                hashB = hashlib.md5()
                hashB.update(textB)

                keyA = str(hashA.hexdigest()) + '_' + str(hashB.hexdigest())
                keyB = str(hashB.hexdigest()) + '_' + str(hashA.hexdigest())

                resultA = objectcache.searchCache('Caption_Compare', keyA)
                resultB = objectcache.searchCache('Caption_Compare', keyB)

                if resultA != None:
                    logging.debug('Found cached result(a): ' + str(resultA))
                    matchRatio = resultA

                elif resultB != None:
                    matchRatio = resultB
                    logging.debug('Found cached result(b): ' + str(resultB))

                elif quickMatch == False:
                    logging.info('Performing match compare. Please wait')
                    matchRatio = string_match.matchRatio(
                        self.textCompare, caption.textCompare)
                    logging.debug('Got compare result: ' + str(matchRatio))
                    objectcache.saveObject('Caption_Compare', keyA, matchRatio)

        except:
            pass

        logging.debug('Match ratio: ' + str(matchRatio))

        return matchRatio
Пример #2
0
    def matchRatioWithCaption(self,caption,quickMatch=False):
        
        assert(caption)
        
        matchRatio = 0.0
        
        try:
            if self.textCompare == None:
                self.textCompare = Caption._textForComparison(self.text)
        
            textA = self.textCompare
        
            if caption.textCompare == None:
                caption.textCompare = Caption._textForComparison(caption.text)
            
            textB = caption.textCompare

            assert(len(textA)>0)
            assert(len(textB)>0)
            
            matchRatio = difflib.SequenceMatcher(None,textA,textB).ratio()
                
            if (matchRatio > 0.02) and (matchRatio < 0.92):
                #construct 2 keys with the hashes of each string in each order. Check the cache before performing a match operation
                hashA = hashlib.md5()
                hashA.update(textA)

                hashB = hashlib.md5()                
                hashB.update(textB)
                
                keyA = str(hashA.hexdigest()) + '_' + str(hashB.hexdigest())
                keyB = str(hashB.hexdigest()) + '_' + str(hashA.hexdigest())
                
                resultA = objectcache.searchCache('Caption_Compare',keyA)
                resultB = objectcache.searchCache('Caption_Compare',keyB)
                
                if resultA != None:
                    logging.debug('Found cached result(a): ' + str(resultA))
                    matchRatio = resultA

                elif resultB != None:
                    matchRatio = resultB
                    logging.debug('Found cached result(b): ' + str(resultB))

                elif quickMatch == False:
                    logging.info('Performing match compare. Please wait')
                    matchRatio = string_match.matchRatio(self.textCompare,caption.textCompare)
                    logging.debug('Got compare result: ' + str(matchRatio))
                    objectcache.saveObject('Caption_Compare',keyA,matchRatio)

        except:
            pass

        logging.debug('Match ratio: ' + str(matchRatio))
        
        return matchRatio
Пример #3
0
    def findTVEpisodesForShow(self, mediaObject):

        searchKey = mediaObject.unique_id

        results = objectcache.searchCache('MediaScraper_TVShow', searchKey)

        if results == None or len(results) == 0:
            results = []
            seasonNumber = 1
            keepSearching = True

            while keepSearching:
                try:
                    resultsAppend = self.api.findTVEpisodesForSeason(
                        mediaObject, seasonNumber)
                except Exception, e:
                    logging.error('Failed to get episodes for ' +
                                  str(mediaObject) + ' season ' +
                                  str(seasonNumber) + ' error: ' + str(e))
                    resultsAppend = []

                if len(resultsAppend) == 0:
                    keepSearching = False
                else:
                    results += resultsAppend
                    seasonNumber += 1

                import time
                time.sleep(5)

            if len(results) > 0:
                objectcache.saveObject('MediaScraper_TVShow', searchKey,
                                       results)
Пример #4
0
    def subtitlesForTVEpisode(self, episodeObject, language="eng"):
        key = str(episodeObject.unique_id) + "_" + language + "_" + str(episodeObject.scraper_source)

        results = objectcache.searchCache("SubtitleScraper_TVEpisode", key)

        if results == None or len(results) == 0:
            results = self._api().subtitlesForMovie(episodeObject, 5, language)
            objectcache.saveObject("SubtitleScraper_TVEpisode", key, results)

        return results
Пример #5
0
    def subtitlesForTVEpisode(self, episodeObject, language='eng'):
        key = str(episodeObject.unique_id) + '_' + language + '_' + str(
            episodeObject.scraper_source)

        results = objectcache.searchCache('SubtitleScraper_TVEpisode', key)

        if results == None or len(results) == 0:
            results = self._api().subtitlesForMovie(episodeObject, 5, language)
            objectcache.saveObject('SubtitleScraper_TVEpisode', key, results)

        return results
Пример #6
0
    def findTVEpisode(self, mediaObject, seasonNumber, episodeNumber):

        searchKey = mediaObject.title + "_S" + str(seasonNumber) + "_E" + str(episodeNumber)

        results = objectcache.searchCache("MediaScraper_TVEpisode", searchKey)

        if results == None or len(results) == 0:
            results = self.api.findTVEpisode(mediaObject, seasonNumber, episodeNumber)
            objectcache.saveObject("MediaScraper_TVEpisode", searchKey, results)

        return results
Пример #7
0
    def findTVEpisode(self, mediaObject, seasonNumber, episodeNumber):

        searchKey = mediaObject.title + '_S' + str(seasonNumber) + '_E' + str(
            episodeNumber)

        results = objectcache.searchCache('MediaScraper_TVEpisode', searchKey)

        if results == None or len(results) == 0:
            results = self.api.findTVEpisode(mediaObject, seasonNumber,
                                             episodeNumber)
            objectcache.saveObject('MediaScraper_TVEpisode', searchKey,
                                   results)

        return results
Пример #8
0
    def findTVEpisodesForSeason(self, mediaObject, seasonNumber):

        searchKey = mediaObject.title + "_S" + str(seasonNumber)

        results = objectcache.searchCache("MediaScraper_TVSeason", searchKey)

        if results == None or len(results) == 0:
            results = self.api.findTVEpisodesForSeason(mediaObject, seasonNumber)
            objectcache.saveObject("MediaScraper_TVSeason", searchKey, results)

        if results:
            results.sort(key=lambda x: float(x.episode_number))

        return results
Пример #9
0
    def findTVEpisodesForSeason(self, mediaObject, seasonNumber):

        searchKey = mediaObject.title + '_S' + str(seasonNumber)

        results = objectcache.searchCache('MediaScraper_TVSeason', searchKey)

        if results == None or len(results) == 0:
            results = self.api.findTVEpisodesForSeason(mediaObject,
                                                       seasonNumber)
            objectcache.saveObject('MediaScraper_TVSeason', searchKey, results)

        if results:
            results.sort(key=lambda x: float(x.episode_number))

        return results
Пример #10
0
    def findTVShow(self, tvshow, year=None):

        tvshow = tvshow.strip()

        seasonNumber = MediaScraper._extractSeasonNumberFromName(tvshow)

        tvshow = MediaScraper._removeSeasonFromName(tvshow)

        #look for a ' '/'_' followed by 'd'/'disc'/'disk' followed by a number and remove
        tvshow = re.sub(r'(?i)[_ ](d|disc|disk)[_| ]?\d{1,2}', '', tvshow)

        tvshow = tvshow.strip()

        if tvshow[-1] == '-':
            tvshow = tvshow[0:len(tvshow) - 1]
            tvshow = tvshow.strip()

        if year is None:
            year = MediaScraper._extractYearFromName(tvshow)
            tvshow = MediaScraper._removeYearFromName(tvshow)

        results = objectcache.searchCache('MediaScraper_TVShow', tvshow)

        if results == None:
            results = self.api.findTVShow(tvshow, seasonNumber, year)
            objectcache.saveObject('MediaScraper_TVShow', tvshow, results)

        if len(results) == 0:
            logging.info('No results found for ' + tvshow +
                         ', searching for acronyms')

            acronyms = MediaScraper._acronymsFromNameWithType(tvshow, 'tvshow')

            logging.debug('Found acronyms: ' + str(acronyms))

            if len(acronyms) == 1:
                results = self.api.findTVShow(acronyms[0], seasonNumber, year)

        if results:
            '''sort by most popular'''
            results.sort(key=lambda x: float(x.popularity))
            results.reverse()

        logging.debug('Returning TV shows: ' + str(results))

        return results
Пример #11
0
    def findTVShow(self, tvshow, year=None):

        tvshow = tvshow.strip()

        seasonNumber = MediaScraper._extractSeasonNumberFromName(tvshow)

        tvshow = MediaScraper._removeSeasonFromName(tvshow)

        # look for a ' '/'_' followed by 'd'/'disc'/'disk' followed by a number and remove
        tvshow = re.sub(r"(?i)[_ ](d|disc|disk)[_| ]?\d{1,2}", "", tvshow)

        tvshow = tvshow.strip()

        if tvshow[-1] == "-":
            tvshow = tvshow[0 : len(tvshow) - 1]
            tvshow = tvshow.strip()

        if year is None:
            year = MediaScraper._extractYearFromName(tvshow)
            tvshow = MediaScraper._removeYearFromName(tvshow)

        results = objectcache.searchCache("MediaScraper_TVShow", tvshow)

        if results == None:
            results = self.api.findTVShow(tvshow, seasonNumber, year)
            objectcache.saveObject("MediaScraper_TVShow", tvshow, results)

        if len(results) == 0:
            logging.info("No results found for " + tvshow + ", searching for acronyms")

            acronyms = MediaScraper._acronymsFromNameWithType(tvshow, "tvshow")

            logging.debug("Found acronyms: " + str(acronyms))

            if len(acronyms) == 1:
                results = self.api.findTVShow(acronyms[0], seasonNumber, year)

        if results:
            """sort by most popular"""
            results.sort(key=lambda x: float(x.popularity))
            results.reverse()

        logging.debug("Returning TV shows: " + str(results))

        return results
Пример #12
0
    def findMovie(self, movie, year=None):
        movie = movie.strip()

        if year is None:
            year = MediaScraper._extractYearFromName(movie)
            movie = MediaScraper._removeYearFromName(movie)

        results = objectcache.searchCache('MediaScraper_Movie', movie)

        if results == None:
            results = []

            for searchWord in MediaScraper._searchCandidatesFromName(movie):
                newResults = self.api.findMovie(searchWord, year)

                if newResults:
                    results += newResults

            objectcache.saveObject('MediaScraper_Movie', movie, results)

        if len(results) == 0:
            logging.debug('No results found for ' + movie +
                          ', searching for acronyms')

            acronyms = MediaScraper._acronymsFromNameWithType(movie, 'movie')

            logging.debug('Found acronyms: ' + str(acronyms))

            if len(acronyms) == 1:
                results = self.api.findMovie(acronyms[0], year)

        if results:
            '''sort by most popular'''
            results.sort(key=lambda x: float(x.popularity))
            results.reverse()

        logging.debug('Returning Movies: ' + str(results))

        return results
Пример #13
0
    def findMovie(self, movie, year=None):
        movie = movie.strip()

        if year is None:
            year = MediaScraper._extractYearFromName(movie)
            movie = MediaScraper._removeYearFromName(movie)

        results = objectcache.searchCache("MediaScraper_Movie", movie)

        if results == None:
            results = []

            for searchWord in MediaScraper._searchCandidatesFromName(movie):
                newResults = self.api.findMovie(searchWord, year)

                if newResults:
                    results += newResults

            objectcache.saveObject("MediaScraper_Movie", movie, results)

        if len(results) == 0:
            logging.debug("No results found for " + movie + ", searching for acronyms")

            acronyms = MediaScraper._acronymsFromNameWithType(movie, "movie")

            logging.debug("Found acronyms: " + str(acronyms))

            if len(acronyms) == 1:
                results = self.api.findMovie(acronyms[0], year)

        if results:
            """sort by most popular"""
            results.sort(key=lambda x: float(x.popularity))
            results.reverse()

        logging.debug("Returning Movies: " + str(results))

        return results
Пример #14
0
    def findTVEpisodesForShow(self, mediaObject):

        searchKey = mediaObject.unique_id

        results = objectcache.searchCache("MediaScraper_TVShow", searchKey)

        if results == None or len(results) == 0:
            results = []
            seasonNumber = 1
            keepSearching = True

            while keepSearching:
                try:
                    resultsAppend = self.api.findTVEpisodesForSeason(mediaObject, seasonNumber)
                except Exception, e:
                    logging.error(
                        "Failed to get episodes for "
                        + str(mediaObject)
                        + " season "
                        + str(seasonNumber)
                        + " error: "
                        + str(e)
                    )
                    resultsAppend = []

                if len(resultsAppend) == 0:
                    keepSearching = False
                else:
                    results += resultsAppend
                    seasonNumber += 1

                import time

                time.sleep(5)

            if len(results) > 0:
                objectcache.saveObject("MediaScraper_TVShow", searchKey, results)