示例#1
0
    def _fetch_metadata(self):
        searchtitle = self._title
        if searchtitle.endswith(", The"):
            searchtitle = "The {}".format(searchtitle[:-5])
        elif searchtitle.endswith(", A"):
            searchtitle = "A {}".format(searchtitle[:-3])

        imdb = Imdb()
        if "id" not in self._md:
            print("  * Searching IMDB")
            results = imdb.search_for_title(searchtitle)
            movie_results = [r for r in results if r["type"] == "feature" and r["year"] is not None]

            while len(movie_results) == 0:
                searchtitle = input("No results for \"%s\" Enter alternate/correct movie title >> " % searchtitle)

                results = imdb.search_for_title(searchtitle)
                movie_results = [r for r in results if r["type"] == "feature" and r["year"] is not None]

            exact_matches = [r for r in movie_results if r["title"].lower() == searchtitle.lower()]

            if len(exact_matches) > 0:
                movie_results = exact_matches

            if len(movie_results) > 1:
                choices=[("%s (%s)" % (r["title"], r["year"]), idx) for idx, r in enumerate(movie_results)]
                choices.append(("Not found", -1))
                answer = inquirer.prompt([
                    inquirer.List("index",
                        message="Multiple results found:",
                        choices=choices
                    )
                ])
                if answer["index"] == -1:
                    self._md["id"] = input("Enter IMDB id: ")
                    movie_results = []
                else:
                    movie_results = [movie_results[answer["index"]]]

            if len(movie_results) > 0:
                mpr = movie_results[0]
                self._md["id"] = mpr["imdb_id"]
                print("  * Fetching data for {} ({})".format(mpr["title"], mpr["year"]))
        else:
            print("  * Fetching data for %s" % self._md["id"])

        imdb_movie = imdb.get_title(self._md["id"])

        self._md["title"] = imdb_movie["base"]["title"]
        self._md["year"] = imdb_movie["base"]["year"]

        if "outline" in imdb_movie["plot"]:
            self._md["plot"] = imdb_movie["plot"]["outline"]["text"]
        else:
            self._md["plot"] = imdb_movie["plot"]["summaries"][0]["text"]

        self._md["genres"] = imdb.get_title_genres(self._md["id"])["genres"]

        self._write_nfo()
示例#2
0
def IMDBSearch(request):
    client = Imdb(locale='en_US')
    imdb = ImdbFacade(client=client)
    imdb2 = Imdb()
    results2 = set([])

    search_term = ''

    if 'search' in request.GET and request.GET['search'] is not '':
        search_term = request.GET['search']

        results = imdb2.search_for_title(search_term)
    else:
        results = Media.objects.none()

    ## Attempting to get the more detailed information, It works, sorta, but it'a really slow.
    # for r in results:
    #     results2.add(r.imdb_id)
    # for r in results2:
    #     print(imdb2.get_title(r)

    table = IMDBSearchTable(results)
    RequestConfig(request).configure(table)

    return render(request, 'pages/IMDBsearch.html',
                  {
                      'search_term': search_term,
                      'table': table,
                  }
                  )
示例#3
0
def main():
    print("! Getting a list of TV shows from eztv...")
    showlist_page = lxml.html.fromstring(requests.get(EZTV_URL).content)
    shows = [l.text for l in showlist_page.xpath('//a[@class="thread_link"]')]
    print("")
    imdb = Imdb()
    episode_records = []
    for show_name in shows[:10]:
        print("* Processing `{}`...".format(show_name))
        episodes = None
        for show in imdb.search_for_title(show_name):
            try:
                episodes = imdb.get_episodes(show['imdb_id'])
                break
            except (RuntimeError, TypeError):
                # RuntimeError: This is thrown when a show is not recognized a series
                # TypeError: Bug where seasons is None.
                continue
        if episodes is None:
            print("  ! Couldn't find an IMDB entry for `{}`. Ignoring.".format(
                show_name))
            continue
        episode_records += [e.__dict__ for e in episodes]
    df = pd.DataFrame(episode_records)
    df.to_csv(DATA_PATH, index=False)
示例#4
0
def imdb_rating(movieTitle, year=None):
    imdb = Imdb()
    try:
        results = imdb.search_for_title(movieTitle)
    except:
        print('WARNING: Could not find the title %s' % movieTitle)
        return 0.00
    if year is None:
        bestHit = results[0]
    else:
        gotHit = False
        for result in results:
            movieYear = int(result.get('year'))
            if movieYear - 2 <= year and movieYear + 2 >= year:
                bestHit = result
                gotHit = True
                break
        if not gotHit:
            print('WARNING: Could not get match for %s' % movieTitle)
            return 0.00
    print('-=MATCH=- %s --from %s  --=--  %s' % (movieTitle, year, bestHit))
    idBestHit = bestHit.get('imdb_id')
    rating = imdb.get_title_by_id(idBestHit).rating
    movType = imdb.get_title_by_id(idBestHit).type
    if movType.lower() != 'feature' and movType.lower() != 'documentary':
        print('WARNING: This is not a feature-film or docu: %s' % movieTitle)
        return 0.00
    elif rating is None:
        print('WARNING: Could not get rating from title %s' %
              bestHit.get('title'))
        return 0.00
    return float(rating)
示例#5
0
def test(title):
    imdb = Imdb()

    try:
        search = imdb.search_for_title(title)[0]
    except IndexError:
        return 'Movie not found.'

    #results = imdb.search_for_title(title)[:5]
    #for i in range(len(results)):
    #    movie = results[i]
    #    print('{}. {} ({})'.format(i+1, movie['title'], movie['year']))

    #while True:
    #    prompt = raw_input('> ')
    #    if prompt in ['1','2','3','4','5']:
    #        break
    #    else:
    #        print('Invalid choice.')

    #select = results[int(prompt)-1]
    #movie = imdb.get_title_by_id(select['imdb_id'])

    # checking out all dir(movies)
    #for i in dir(movie):
    #    if i.startswith('_'):
    #        continue

#x = getattr(movie, i)
#    print('{} - {}'.format(i, x))

    movie = imdb.get_title_by_id(search['imdb_id'])
    return '{} ({})\n{}\nIMDB: {}'.format(movie.title, movie.year,
                                          movie.plot_outline, movie.rating)
示例#6
0
文件: imdb.py 项目: Rochan-A/CineLog
def metadata(s):
    imdb = Imdb()
    imdb = Imdb(anonymize=True)  # to proxy requests
    names = imdb.search_for_title(s)
    title = imdb.get_title_by_id(names[0][u'imdb_id'])
    cast = cast_rating(title.cast_summary)
    return title
示例#7
0
def GetMeta(t, y):
    tmdb.API_KEY = 'b888b64c9155c26ade5659ea4dd60e64'
    search = tmdb.Search()
    search.movie(query=t)
    for s in search.results:
        year = s['release_date'].split('-', 2)
        if year[0] == y:
            d = s
            state = True
            break
        else:
            state = False

    imdb = Imdb()
    results = imdb.search_for_title(t)
    if state is True:
        for i in results:
            if i['type'] == 'feature' and i['year'] == y:
                result = i
                g = imdb.get_title_genres(result['imdb_id'])
                d['genre_ids'] = g['genres']
                break
            else:
                d['genre_ids'] = ''
        return d
    else:
        d = {}
        return d
示例#8
0
class IMDbToAnki(QDialog):
    def __init__(self):
        super().__init__()
        try:
            from imdbpie import Imdb
        except ImportError:
            showInfo('Please install the imdbpie module.')
            return
        self.imdb = Imdb()
        self.mainLayout = QGridLayout(self)
        self.queryInput = QLineEdit()
        self.searchResults = QListWidget()
        self.createButton = QPushButton("Create")
        self.createButton.setAutoDefault(False)
        self.sidebar = QWidget()
        self.sidebarLayout = QVBoxLayout(self.sidebar)
        # All valid tokens for imdbpie Person objects.
        self.departments = [
            'director', 'writer', 'cast', 'producer', 'cinematographer',
            'editor', 'casting_director', 'production_designer',
            'art_director', 'set_decorator', 'costume_designer',
            'make_up_department', 'assistant_director', 'art_department',
            'sound_department', 'visual_effects', 'music_department',
            'miscellaneous'
        ]
        self.depBoxes = [QCheckBox(token) for token in self.departments]
        for box in self.depBoxes:
            self.sidebarLayout.addWidget(box)
        self.queryInput.returnPressed.connect(self.search)
        self.createButton.clicked.connect(self.createNotes)
        self.mainLayout.addWidget(self.queryInput, 0, 0, 1, 1)
        self.mainLayout.addWidget(self.searchResults, 1, 0, 1, 1)
        self.mainLayout.addWidget(self.createButton, 2, 0, 1, 1)
        self.mainLayout.addWidget(self.sidebar, 0, 1, 3, 1)
        self.exec_()

    def search(self):
        self.searchResults.clear()
        for result in self.imdb.search_for_title(self.queryInput.text()):
            item = QListWidgetItem(f'{result["title"]} ({result["year"]})',
                                   self.searchResults)
            item.setData(Qt.UserRole, result['imdb_id'])
        self.searchResults.setCurrentRow(0)

    def credits(self):
        title = self.searchResults.currentItem().data(Qt.UserRole)
        departments = self.imdb.get_title_credits(title)['credits']
        enabledDeps = [box.text() for box in self.depBoxes if box.checkState()]
        nestedCredits = [departments[dep] for dep in enabledDeps]
        return list(chain.from_iterable(nestedCredits))

    def createNotes(self):
        mw.progress.start(immediate=True)
        titleYear = self.searchResults.currentItem().text()
        data = [
            ';'.join([titleYear, Person(p).fields]) for p in self.credits()
        ]
        mw.progress.finish()
        ImportDialog(mw, ['Film Title', 'Name', 'Role', 'Image', 'Tags'], data)
示例#9
0
class imdb_api:
    def __init__(self, anonymize=False):
        self.imdb = Imdb(anonymize=anonymize)

    def search(self, title, imdb_id=""):
        if not imdb_id:
            print("Searching")
            results = self.imdb.search_for_title(title)
            chosen = self.search_select(results)
            title_id = results[chosen]["imdb_id"]
        else:
            title_id = imdb_id
        print("Looking up series")
        title = self.imdb.get_title_by_id(title_id)
        if title.type == "tv_series":
            print("Detected TV series, downloading episode list")
            episodes = self.imdb.get_episodes(title_id)
            print(episodes)
            return [title, episodes]
        else:
            return [title]

    def search_select(self, results):
        def get_input(allow_0=True):
            print("Which show do you want? type 0 to see more")
            out = 0
            while out == 0:
                inp = input("> ")
                if inp == "0" and allow_0:
                    return out
                else:
                    try:
                        out = int(inp)
                    except ValueError:
                        print("Not a number")
            return out

        if len(results) > 1:
            chosen = 0
            for index, show in enumerate(results):
                print("[{0}] ({3})\t{1}\t{2}".format(str(index + 1),
                                                     show["title"],
                                                     show["year"],
                                                     show["imdb_id"]))
                if ((index + 1) % 10) == 0:
                    chosen = get_input()
                    if chosen != 0:
                        break
            if chosen == 0:
                print("No more")
                chosen = get_input(allow_0=False)
            return chosen - 1


#imdb = imdb_api()
#x = imdb.search("Person of Interest")
#print(x)
示例#10
0
def review_extracter(movie):
    """input movie name, outputs txt file with reviews on the front page of imdb"""
    ### extracts content from movie page ###
    imdb = Imdb()
    url = imdb.search_for_title(movie)[0]
    reviews = imdb.get_title_user_reviews(url["imdb_id"])
    ### creates list of all reviews present on the front page###
    imdb = Imdb()
    url = imdb.search_for_title(movie)[0]
    reviews = imdb.get_title_user_reviews(url["imdb_id"])
    ### creates txt file for all reviews present on the front page###
    review_list = []
    for i in range(len(reviews["reviews"])):
        line = reviews["reviews"][i]["reviewText"]
        review_list.append(line)
    ###pickles list###
    with open(f"{movie}_imdb.pickle", "wb") as f:
        pickle.dump(review_list, f)
示例#11
0
def get_title_id(movie, year):
    ''' Get title_id from imdb API
    '''
    imdb = Imdb(anonymize=True)
    list_of_matches = imdb.search_for_title(movie)
    try:
        possible_match = next(filter(lambda x: x['year'] == year, list_of_matches))
        return possible_match['imdb_id']
    except StopIteration:
        print('No match for movie {}'.format(movie))
示例#12
0
def getMovieInfo(title):
    imdb = Imdb(anonymize=True)
    movie_id = imdb.search_for_title(title)[0]['imdb_id']
    movie = imdb.get_title_by_id(movie_id)
    movieInfo = [
        'Title: ' + movie.title, 'Rating: ' + str(movie.rating),
        'Runtime: ' + str(int(movie.runtime) / 60),
        'Release Date: ' + movie.release_date,
        'Certification: ' + movie.certification
    ]
    return movieInfo
示例#13
0
def main():
    imdb = Imdb()
    movie = str(input('Movie Name: '))
    movie_search = '+'.join(movie.split())
    # print(imdb.search_for_name("Christian Bale"))
    movie_dict = imdb.search_for_title(movie_search)
    help_print_movie(movie_dict)
    imdb_id = str(input('IMBD ID: '))
    review_dict = imdb.get_title_user_reviews(imdb_id)
    review_list = review_dict['reviews']
    help_print_review(review_list)
示例#14
0
def identify_movies(movies):
    """ identifying the movies from IMDB """
    imdb = Imdb()

    ids = []
    for key, vals in movies.items():
        for val in vals:
            for info in imdb.search_for_title(val):
                if key == info.get('year') and val == info.get(
                        'title').lower():
                    ids.append(info.get('imdb_id'))

    return [imdb.get_title_by_id(id) for id in ids]
示例#15
0
def SearchMovies(value):
	imdb = Imdb()
	dict=imdb.search_for_title(value)
	dictsAsync = []
	movies_result = []
	for m in dict:
		if (m['type']!=None):
			dictsAsync.append(APIGetTitle.s(m['imdb_id']))
	groupDictsAsync = group(dictsAsync)
	result = groupDictsAsync.apply_async()
	while result.ready()==False:
		time.sleep(1)
	for m in result.get():
		mov=Movie()
		mov.SetAfterInit(m)
		movies_result.append(mov)
	return movies_result
示例#16
0
    def query_movie(self, query, data):

        imdb = Imdb()
        res = ""
        if query == "popularshows":
            ans = imdb.get_popular_shows()
            for i in range(10):
                res += ans['ranks'][i]['title'] + "\n"
        elif query == "popularmovies":
            ans = imdb.get_popular_movies()
            for i in range(10):
                res += ans['ranks'][i]['title'] + "\n"
        elif query == "search":
            ans = imdb.search_for_title(data)
            for i in range(5):
                res += ans[i]['title'] + "\n"
        return res
示例#17
0
def seasonBuilder(title):

    # gets the information of the show in general
    # Also gets the seasons and episdoes in a dict to use in the other file

    # iniatilize imdb object
    imdb = Imdb()
    imdb = Imdb(anonymize=True)

    title_json = imdb.search_for_title(title)

    if title_json == []:
        print('No Results Found')
    else:

        # get imdb id to get more information

        title_id = title_json[0]['imdb_id']
        result = imdb.get_title_by_id(title_id)

        show_title = result.title
        year = result.year
        image_url = result.cover_url
        description = result.plot_outline

        temp = imdb.get_episodes(title_id)

        # build season dict to send back to main file
        seasons = {}
        episodes = {}
        season_counter = 1
        for e in temp:

            # new dict entry for the next season, the number season of the show is the entry key
            if e.season > season_counter:

                # the current season is done, time to start building the next episiode dict
                seasons[season_counter] = episodes

                episodes = {}
                season_counter += 1

            episodes[e.episode] = [e.title, e.release_date, e.imdb_id]

        return show_title, year, image_url, description, seasons
class CommonMetadataIMDB(object):
    """
    Class for interfacing with imdb
    """

    def __init__(self, cache=True, cache_dir=None):
        # open connection to imdb
        if cache is not None:
            if cache_dir is not None:
                self.imdb = Imdb(cache=True, cache_dir=cache_dir)
            else:
                self.imdb = Imdb(cache=True)
        else:
            self.imdb = Imdb()

    def com_imdb_title_search(self, media_title):
        """
        # fetch info from title
        """
        return self.imdb.search_for_title(media_title)

    def com_imdb_id_search(self, media_id):
        """
        # fetch info by ttid
        """
        return self.imdb.get_title_by_id(media_id)

    def com_imdb_person_by_id(self, person_id):
        """
        # fetch person info by id
        """
        return self.imdb.get_person_by_id(person_id)

    def com_imdb_person_images_by_id(self, person_id):
        """
        # fetch person images by id
        """
        return self.imdb.get_person_images(person_id)

    def com_imdb_title_review_by_id(self, media_id):
        """
        # fetch the title review
        """
        return self.imdb.get_title_reviews(media_id)
示例#19
0
 def _query_api(self, title, year=None):
     name = title
     if year:
         name += ' (' + text_type(year) + ')'
     log.info('Querying imdb api for %s', name)
     api = Imdb()
     imdb_movies = api.search_for_title(title)
     # Find the first movie that matches the title (and year if present)
     for movie in imdb_movies:
         if self.sanitize_imdb_title(
                 movie['title']) == self.sanitize_imdb_title(title):
             # If a year is present, it should also be the same
             if year:
                 if movie['year'] == text_type(year):
                     return movie['imdb_id'], int(movie['year'])
                 else:
                     continue
             # If no year is present, take the first match
             else:
                 return movie['imdb_id'], int(movie['year'])
     # If no match is found, try to search for alternative titles of the first (most relevant) result
     if len(imdb_movies) > 0:
         best_match = imdb_movies[0]
         best_match_title_versions = api.get_title_versions(
             best_match['imdb_id'])
         if best_match_title_versions and 'alternateTitles' in best_match_title_versions:
             for alternate_title in best_match_title_versions[
                     'alternateTitles']:
                 if self.sanitize_imdb_title(
                         alternate_title['title']
                 ) == self.sanitize_imdb_title(title):
                     # If a year is present, it should also be the same
                     if year:
                         if best_match['year'] == text_type(year):
                             return best_match['imdb_id'], int(
                                 best_match['year'])
                         else:
                             continue
                     # If no year is present, take the first match
                     else:
                         return best_match['imdb_id'], text_type(
                             best_match['year'])
     return None, year
示例#20
0
文件: views.py 项目: zdalih/mea
    def post(self, request, *arg, **kwargs):
        ia = Imdb()

        query = request.data['query']

        searchResult = ia.search_for_title(query)
        tosend = []

        for m in searchResult:
            imdbId = m['imdb_id']
            if imdbId[0:2] == 'tt':
                m_dict = {}
                m_dict['imdbId'] = imdbId
                m_dict['title'] = m['title']
                m_dict['year'] = str(m['year'])
                # m_dict['posterUrl'] = ia.get_title(imdbId)['base']['image']['url']
                tosend.append(m_dict)


        return HttpResponse(json.dumps(tosend))
示例#21
0
    def query_movie(self, message, bot_handler: Any):

        imdb = Imdb()
        res = ""
        query = message['content'].split()[1]
        if query == "popularshows":
            ans = imdb.get_popular_shows()
            for i in range(10):
                res += ans['ranks'][i]['title'] + "\n"
        elif query == "popularmovies":
            ans = imdb.get_popular_movies()
            for i in range(10):
                res += ans['ranks'][i]['title'] + "\n"
        elif query == "search":
            data = message['content'].split()[2]
            ans = imdb.search_for_title(data)
            for i in range(5):
                res += ans[i]['title'] + "\n"
        else:
            res += "No movie reponse found. \n Use `@Savior help` for commands"
        return res
示例#22
0
def average_reviewscore(title):
    """
    Accepts a movie title from user as a string.
    Calls the imdbpie API and iterates through each user review left for the specified title.
    Uses sentiment analysis and prints the average compound score of all reviews left for the particular title.
    """
    imdb = Imdb()
    id = imdb.search_for_title(title)[0]['imdb_id']
    reviews = imdb.get_title_user_reviews(id)
    numberofreviews = len(reviews['reviews'])
    compound_scores = []
    for i in range(numberofreviews):
        review = reviews['reviews'][i]['reviewText']
        score = SentimentIntensityAnalyzer().polarity_scores(review)
        compound_scores.append(score['compound'])
    numerator = 0
    denominator = len(compound_scores)
    for i in range(denominator):
        numerator += compound_scores[i]
    average = numerator / denominator
    print(average)
示例#23
0
def imdb_search():

    try:
        from threading import Thread
        from guessit import guessit
        from imdbpie import Imdb

        def imdb_ratings():
            ratings.update(imdb.get_title_ratings(movie_imdb))

        def movie_country():
            country.extend(imdb.get_title_versions(movie_imdb)['origins'])

        imdb = Imdb()
        torrent_info = guessit(torrent_name)
        movie_title = torrent_info['title'] + ' ' + str(torrent_info['year'])
        movie_imdb = imdb.search_for_title(movie_title)[0]['imdb_id']

        ratings = {}
        country = []
        t1 = Thread(target=movie_country)
        t2 = Thread(target=imdb_ratings)
        t1.start()
        t2.start()
        t1.join()
        t2.join()
    except:
        return

    rating = ratings['rating']
    votes = ratings['ratingCount']

    if rating < minimum_rating or votes < minimum_votes:
        xmlrpc('d.erase', (torrent_hash, ))
        sys.exit()

    if skip_foreign and 'US' not in country:
        xmlrpc('d.erase', (torrent_hash, ))
        sys.exit()
示例#24
0
def fetch_movie_info(name):

    # API key
    apikey = "78d08b59"

    try:
        # Gets movie info
        imdb = Imdb()
        imdb_search = imdb.search_for_title(name)
        movie_id = imdb_search[0]["imdb_id"]

        url = "http://www.omdbapi.com/?i=" + movie_id + "&apikey=" + apikey
        response = urllib.request.urlopen(url).read()
        jsonvalues = json.loads(response)

        if jsonvalues['Response'] == 'True':
            title = jsonvalues["Title"]
            rating = jsonvalues["imdbRating"]
            genre = jsonvalues["Genre"]
            year = jsonvalues["Year"]
            actors = jsonvalues["Actors"]
            director = jsonvalues["Director"]
            runtime = jsonvalues["Runtime"]

            # Gathers movie data in a list
            info_list = [
                movie_id, title,
                float(rating), genre,
                int(year), actors, director, runtime
            ]

            return (True, info_list)

        else:
            return (False, name)

    except Exception:
        return (False, name)
示例#25
0
    def _get_result(self, query, select_result):
        not_found_exception = self.MediaNotFoundException(
            'Could not find the anime on thetvdb.')

        imdb = Imdb(cache=True)
        results = imdb.search_for_title(query)

        if len(results) == 0:
            raise not_found_exception
        elif len(results) == 1:
            item = results[0]
        else:
            formatted_results = list([(x['imdb_id'],
                                       '{} ({})'.format(x['title'], x['year']))
                                      for x in results])

            identifier = select_result(formatted_results)

            for item in results:
                if item['imdb_id'] == identifier:
                    break

        return item
示例#26
0
class ImdbCommand(Command):
    name = 'imdb'
    aliases = ['movie']
    description = 'Searches IMDB for movie titles.'

    def __init__(self, bot, config):
        super().__init__(bot, config)
        self._imdb = Imdb(cache=True, exclude_episodes=True)

    def run(self, message, args):
        if not args:
            self.reply(message, 'Please supply some search terms!')
            return

        self.bot.telegram.send_chat_action(message.chat.id, 'typing')
        results = self._imdb.search_for_title(' '.join(args))
        if not results:
            self.reply(message, 'No results found!')
            return

        result = self._imdb.get_title_by_id(results[0]['imdb_id'])
        reply = '<b>URL:</b> http://www.imdb.com/title/{0}\n'.format(telegram_escape(result.imdb_id))
        reply += '<b>Title:</b> {0}\n'.format(telegram_escape(result.title))
        reply += '<b>Year:</b> {0}\n'.format(result.year)
        reply += '<b>Genre:</b> {0}\n'.format(telegram_escape(', '.join(result.genres[:3])))
        reply += '<b>Rating:</b> {0}\n'.format(result.rating)
        runtime, _ = divmod(result.runtime, 60)
        reply += '<b>Runtime:</b> {0} minutes\n'.format(runtime)
        reply += '<b>Certification:</b> {0}\n'.format(result.certification)
        reply += '<b>Cast:</b> {0}\n'.format(
            telegram_escape(', '.join([person.name for person in result.cast_summary[:5]])))
        reply += '<b>Director(s):</b> {0}\n\n'.format(
            telegram_escape(', '.join([person.name for person in result.directors_summary[:5]])))
        reply += telegram_escape(result.plots[0])

        self.reply(message, reply, parse_mode='HTML')
from imdbpie import Imdb
imdb = Imdb()
imdb = Imdb(anonymize=True)
var1 = imdb.top_250()
var2 = imdb.search_for_title("The Dark Knight")
print(var2)
text_file = open("/test/src/test/output.txt", "w")
text_file.write(" %s" % var1)
text_file.close()
示例#28
0
from imdbpie import Imdb

imdb = Imdb()
movie_results = []
results = imdb.search_for_title("Hell Boy II")
for result in results:
    if result['type'] == "feature":
        movie_results.append(result)
        print(result)
示例#29
0
soup = BeautifulSoup(page.text, 'html.parser')

movie_diary = soup.find_all(class_='headline-3 prettify')

movies = []

for x in movie_diary:
    movies.append(x.find('a').text)

print('Finished getting movie list...')

imdb = Imdb()

poster = {}

if not os.path.isdir(USER):
    print('Folder for ' + USER + ' created!')
    os.mkdir(USER)
else:
    print('Folder for ' + USER + ' exists!')

for movie in movies:
    try:
        if not os.path.isfile(USER + '/' + movie + '.jpg'):
            print('Downloading image for: ' + movie)
            urllib.request.urlretrieve(imdb.get_title(imdb.search_for_title(movie)[0]['imdb_id'])['base']['image']['url'], USER + '/' + movie + '.jpg')
        else:
            print('Using local image for: ' + movie)
    except Exception as e:
        print('ERROR: ' + movie + ' could not be processed!')
        print(e)
示例#30
0
from imdbpie import Imdb
import random

imdb = Imdb()
parasite = (imdb.search_for_title("Parasite")[0])
reviews = imdb.get_title_user_reviews("tt6751668")

joker = (imdb.search_for_title("Joker")[0])
review2 = imdb.get_title_user_reviews("tt7286456")

unplanned = (imdb.search_for_title("Unplanned")[0])
review3 = imdb.get_title_user_reviews("tt9024106")

thegodfather = (imdb.search_for_title("The Godfather")[0])
reviews3 = imdb.get_title_user_reviews("tt0068646")

disastermovie = (imdb.search_for_title("Disaster Movie")[0])
reviews2 = imdb.get_title_user_reviews("tt1213644")

# print(reviews)
# import pprint
# pprint.pprint(reviews)
# pprint.pprint(review2)
# pprint.pprint(review3)
# pprint.pprint(reviews3)
# pprint.pprint(reviews2)

# print(reviews['reviews'][0:]['author']['displayName'])
# print(reviews['reviews'][0]['reviewText'])

示例#31
0
class ImdbClient:
    def __init__(self):
        self.imdbpy = IMDb()
        self.imdb = Imdb(exclude_episodes=False)
        self.imdb = Imdb(anonymize=True)  # to proxy requests
        self.db = api.TVDB('B43FF87DE395DF56')

    def get_tweets_from_mongo(self, show, limit):
        # Connect to mongo
        client = MongoClient()

        # access movRie stream db
        movies = client['movieratings_stream']

        # colletion of tweets
        tweets = movies['tweets']

        tweet_text = []
        counter = 0

        # iterate through cursor that takes the 'limit' most recent tweets with hashtag 'show'
        for tweet in tweets.find({'show_title': show}):  # .sort('created_at', pymongo.DESCENDING):
            if counter < limit:
                tweet_text.append(tweet.get("tweet_text"))
                counter += 1
            else:
                break
        return tweet_text

    def get_show_id(self, show_title):
        title_list = list(self.imdb.search_for_title(show_title))
        index = 0
        show_id = None

        while index < len(title_list) and show_id is None:
            if title_list[index] is not None:
                result = title_list[index][u'title'].lower()
                query = show_title.lower()
                # if result in query:
                if fuzz.ratio(result, query) >= 90:
                    # print title_list
                    show_id = title_list[index][u'imdb_id']
            index += 1
        return show_id

    # TODO: get rid of usage of this
    def searchShow(self, tvshow):
        title_id = self.get_show_id(tvshow)
        print(title_id)
        reviews = []
        print(tvshow)

        if title_id is not None and title_id != '':
            reviews = self.imdb.get_title_reviews(title_id, max_results=sys.maxint)
            print reviews
        else:
            print("Invalid show id")

        return reviews

    def fetch_reviews(self, episode_id):
        reviews = self.imdb.get_title_reviews(episode_id, max_results=sys.maxint)

        return reviews

    def getCurrentImdbRating(self, tvshow):
        tvshowid = self.get_show_id(tvshow)
        title = self.imdb.get_title_by_id(tvshowid)
        return float(title.rating)

    # dont use this, use example from
    # http://imdbpy.sourceforge.net/docs/README.series.txt
    def get_all_episode_names(self, tvshow):
        result = self.db.search(tvshow, 'en')
        show = result[0]
        res = []
        for x in range(1, len(show)):
            season = show[x]
            for y in range(1, len(season) + 1):
                if season[y].EpisodeName is not None and season[y].EpisodeName != '':
                    res.append(season[y].EpisodeName)
        return res

    def get_show(self, show_id):
        show = self.imdbpy.get_movie(show_id.replace('t', ''))
        self.imdbpy.update(show, 'episodes')
        print("show_show(" + show_id + "): " + str(show))

        return show

    # episode names for a specific season of tvshow
    def get_specific_episode_names(self, tvshow, season):
        result = self.db.search(tvshow, 'en')
        show = result[0]
        res = []
        season = show[1]
        for x in range(1, len(season) + 1):
            if season[x].EpisodeName is not None:
                print season[x].EpisodeName
                res.append(season[x].EpisodeName)
        return res

    def get_all_episode_reviews(self, episodelist, tvshow):
        reviews = []
        for episode in episodelist:
            curEpisode = episode + " " + tvshow
            reviews.append(self.searchShow(curEpisode))
            # call searchshow for each

        print("Episodes:\n" + str(reviews))
        return reviews
示例#32
0
from imdbpie import Imdb
imdb = Imdb()
imdb = Imdb(anonymize=True)  # to proxy requests

print(imdb.search_for_title("The Dark Knight"))
# https://github.com/richardasaurus/imdb-pie

from imdbpie import Imdb

imdb = Imdb()
imdb = Imdb(anonymize=True)

print(imdb.search_for_title("The Dark Knight"))
print()
print(imdb.search_for_person("Christian Bale"))
print()
print(imdb.get_episodes('tt0096697'))

top250 = imdb.top_250()

for i in range(0, len(top250)):
    print(top250[i])
    print()

title = imdb.get_title_by_id("tt1210166")
for person in title.credits:
    # check if they are a writer
    if person.token == 'writers':
        print(person.name + ' is a writer')
    else:
        print(person.name + ' is not a writer')
from imdbpie import Imdb
imdb = Imdb()
imdb = Imdb(anonymize=True)
imdb = Imdb(cache=True)

f1=open('title.csv','r')
f2=open('details.csv','w')
title=[]
#movieID=[]

for line in f1:
	line=line.strip()
	title.append(line)

for item in title:
	temp = imdb.search_for_title(item)
	for temp_item in temp:
		temp_item = str(temp_item)
		f2.write(temp_item)
		f2.write('\n')
示例#35
0
import psycopg2
from imdbpie import Imdb
import random
imdb = Imdb()
imdb = Imdb(anonymize=True)
variable = imdb.search_for_title("The Dark Knight")[0]
# conn = psycopg2.connect()
# cur = conn.cursor()
title = imdb.get_title_by_id("tt0468569")
print (title.title)
print (title.rating)
print (title.runtime)
x = 0
listOfPopularMovies = imdb.top_250()
while x<15:
    temp = random.randint(1, 249)
    t = listOfPopularMovies[temp]
    tid = t["tconst"]
    print (tid)
    print (t["title"] + " is the " + str(temp) +"th rated movie")
    print ("It's score is: " + str(t["rating"]))

    x = x + 1
示例#36
0
def getMovieInfo(title):
    imdb = Imdb(anonymize = True)
    movie_id = imdb.search_for_title(title)[0]['imdb_id']
    movie = imdb.get_title_by_id(movie_id)
    movieInfo = ['Title: ' + movie.title, 'Rating: ' + str(movie.rating), 'Runtime: ' + str(int(movie.runtime)/60), 'Release Date: ' + movie.release_date, 'Certification: ' + movie.certification]
    return movieInfo
示例#37
0
from imdbpie import Imdb
import os
import re
import my_utils

dir_movies = '/media/titan/videos/movies'
dir_cache = '/media/titan/videos/imdbpie-cache'

imdb = Imdb(anonymize=True, cache = dir_cache)
b_first_file_found = False

for dirName, subdirList, fileList in os.walk(dir_movies):
	for fname in fileList:
		
		if(my_utils.can_skip_file(dirName, fname)):
			continue

		search_str = my_utils.form_search_string(fname)
		print(imdb.search_for_title(search_str))
		
		b_first_file_found = True
		break

		
	if b_first_file_found:
		break

示例#38
0
class Producer(object):
    """
    Class to do most of the processing, and produce filesystem operations to be
    performed by the Consumer. Each operation is passed as a closure.
    """
    # pylint: disable=too-many-instance-attributes
    # Eight is reasonable in this case.
    def __init__(self, args):
        self.args = args
        self.imdb = Imdb()

        # only set up Tkinter if we're going to use it
        if not self.args.files or not self.args.output_dir:
            try:
                from Tkinter import Tk
                from tkFileDialog import askopenfilename, askdirectory
            except ImportError:
                print "Please install the Tkinter library " + \
                        "for graphical file and directory choosers."
                sys.exit(1)
            root = Tk()
            root.withdraw() # keep the root window from appearing

        # get movie files to process
        if not self.args.files:
            self.files = askopenfilename(
                            multiple=True,
                            title="What movies do you want to rename?")
            if not self.files:
                sys.exit(0)
        else:
            if os.name == "nt":
                # use glob to expand wildcards, since Windows sucks and doesn't
                # do it for us
                glob_expansions_list = [iglob(glob_str)
                                    for glob_str in self.args.files]
                self.files = [filename
                        for file_list in glob_expansions_list
                        for filename in file_list]
            else:
                self.files = self.args.files

        # get output directory
        if not self.args.output_dir:
            self.output_dir = askdirectory(title="Choose the output directory")
            if not self.output_dir:
                sys.exit(0)
        else:
            self.output_dir = self.args.output_dir

        # set up guess manipulations
        self.guess_manipulations = []
        if self.args.remove:
            # remove user-specified patterns
            self.guess_manipulations.extend(
                [build_regex_manipulation(pat)
                    for pat in self.args.remove])
        # remove some common junk
        self.guess_manipulations.extend(
            [build_regex_manipulation(pat, sub)
                for (pat, sub) in
                [("[^A-Za-z0-9']+", " "), # replace non-alphanumeric with space
                 ("(\\d){3,4}[ip]", ""), # remove 1080p, 720i, etc.
                 ("(dvd)|(bluray)", ""), # remove DVD, bluray
                 ("yify", ""),
                 ("(x|h)264", ""),
                 ("brrip", ""),
                 ("(19\\d{2})|(2\\d{3})", ""), # remove year if present
                 ("\\s+", " "), # collapse whitespace
                ]
            ])
        # trim leading and trailing whitespace
        self.guess_manipulations.append(string.strip)

        self.b_is_working = True
        # set up shared fs_op queue
        self.fs_op_queue = None
        if not self.args.dry_run and not self.args.synchronous:
            # to be shared by producer, consumer
            self.fs_op_queue = Queue.Queue(maxsize=0)

        # set up consumer
        self.consumer = None
        if self.fs_op_queue:
            self.consumer = Consumer(self, args, self.fs_op_queue)
            self.consumer.daemon = True
            self.consumer.start()

    def get_imdb_title_from_guess(self, title_guess):
        """
        Use the IMDB API to get the year from a title. I'm assuming that the
        first result is the best match for the search, but I don't know if this
        is documented anywhere.
        """
        title_format = "%s (%s)"
        try:
            results_list = self.imdb.search_for_title(title_guess)
        except ValueError:
            print "Error receiving data from IMDB"
            return None
        except Exception as ex:
            print "Caught exception from imdb-pie: {}".format(ex)
            return None

        if results_list:
            result = results_list[0]
            imdb_title = title_format % (result["title"], result["year"])
            self.log_guess('Using "%s" as best result for guess "%s"' %
                           (imdb_title, title_guess))
            return imdb_title
        else:
            print 'ERROR: could not get year for "%s"' % title_guess
            return None

    def get_best_guess(self, filename):
        """
        Given a filename and args, compute the best guess at the title.
        """
        guess = filename

        for manipulate in self.guess_manipulations:
            guess = manipulate(guess)

        self.log_guess('Computed best guess of "%s" for "%s"' %
                       (guess, filename))
        return guess

    def log_guess(self, log_str):
        """
        Log the guess string, if necessary.
        """
        if self.args.verbose or self.args.guess_only:
            print log_str

    def create_container(self, container_dir):
        """
        Produce the file operation to create a particular container directory,
        if necessary. Does not actually perform the file operation.
        """
        if os.path.exists(container_dir):
            if os.path.isdir(container_dir):
                # path exists, and is directory; this is fine
                return True

            # not a directory:
            self.log_fs_problem('"%s" already exists but is not a directory' %
                                container_dir)

            if not self.args.force:
                return False
            else:
                # operation should be forced
                self.log_fs_operation('Remove: "%s"' % container_dir)
                def rm_container():
                    """ Remove the container. """
                    os.remove(container_dir)
                self.do_fs_operation(rm_container)

        self.log_fs_operation('Create: container "%s"' % container_dir)
        def mkdir():
            """ Create the container. If args.output_dir doesn't exist, this
            will create it."""
            os.makedirs(container_dir)
        self.do_fs_operation(mkdir)
        return True

    def transfer_file(self, src_file, dest_file):
        """
        Produce the fs_ops to move/copy src_file to dest_file.
        """
        if os.path.exists(dest_file):
            self.log_fs_problem('File already exists: "%s"' % dest_file)

            if not self.args.force:
                return False
            else:
                # operation should be forced
                self.log_fs_operation('Remove: "%s"' % dest_file)
                def rm_file():
                    """ Remove dest_file."""
                    if os.path.isdir(dest_file):
                        shutil.rmtree(dest_file)
                    else:
                        os.remove(dest_file)
                self.do_fs_operation(rm_file)

        self.log_fs_operation('%s: "%s" -> "%s"' %
                              ("Copy" if self.args.copy else "Move",
                               src_file,
                               dest_file))
        def rename_file():
            """ Rename the file."""
            if self.args.copy:
                shutil.copy(src_file, dest_file)
            else:
                shutil.move(src_file, dest_file)
        self.do_fs_operation(rename_file)
        return True

    def do_fs_operation(self, fs_op):
        """ Perform the file operation."""
        if not self.args.dry_run:
            if not self.fs_op_queue:
                fs_op()
            else:
                self.fs_op_queue.put(fs_op)

    def log_fs_operation(self, log_str):
        """ Log the file operation, if necessary. """
        if self.args.verbose or self.args.dry_run:
            print log_str

    def log_fs_problem(self, prob_str):
        """ Log the filesystem problem, if necessary."""
        if self.args.verbose or self.args.dry_run or not self.args.force:
            print prob_str

    def get_dest_dir(self, pretty_name):
        """ Given the pretty name, compute the destination directory."""
        dest_dir = None
        if self.args.bare_file:
            dest_dir = self.output_dir
        else:
            dest_dir = os.path.join(self.output_dir, pretty_name)
        return dest_dir

    def process_file(self, filepath):
        """
        Process a given movie file.
        """
        if not os.path.isfile(filepath):
            print 'Not a file: "%s"' % filepath
            return False
        filename_with_ext = os.path.basename(filepath)
        (filename, ext) = os.path.splitext(filename_with_ext)
        guess = self.get_best_guess(filename)
        pretty_name = self.get_imdb_title_from_guess(guess)
        if self.args.guess_only or not pretty_name:
            return True if pretty_name else False

        for char in "\\/:*?\"<>|":
            # clean disallowed chars from windows filenames
            pretty_name = pretty_name.replace(char, " ")

        dest_dir = self.get_dest_dir(pretty_name)
        dest_file = os.path.join(dest_dir, pretty_name + ext)
        if self.args.keep_going and os.path.isfile(dest_file):
            return True
        if self.args.interactive:
            # only continue if the user says to
            action = "copy" if self.args.copy else "move"
            print '''
            Will %s "%s"
                   -> "%s".''' % (action, filepath, dest_file),
            cont = raw_input("\nContinue? [Y/n] ")
            if not re.match("^(y(es)?)$|^$", cont, re.IGNORECASE):
                return False
        # take advantage of short-circuiting operators to chain together the
        # next few calls, stop at any failure, and return the final success or
        # failure value
        return (self.create_container(dest_dir) and
                self.transfer_file(filepath, dest_file))

    def run(self):
        """
        Move/copy all the movie files into containing directories in the
        output_dir.
        """
        # process the file args
        total_renamed = 0
        total_skipped = 0
        if self.args.verbose:
            print "Processing files: {}".format(self.files)
        for movie_file in self.files:
            b_success = self.process_file(movie_file)
            if self.args.totals:
                if b_success:
                    total_renamed += 1
                else:
                    total_skipped += 1

        if self.args.totals:
            total_processed = total_renamed + total_skipped
            print ("%d file%s processed: %d renamed, %d skipped" %
                   (total_processed,
                    "s" if total_processed > 1 else "",
                    total_renamed,
                    total_skipped))

        self.b_is_working = False

        if self.consumer:
            if self.consumer.is_alive():
                print \
                    "Do NOT exit, filesystem thread is still processing files!"
            while self.consumer.is_alive():
                self.consumer.join(0.5)


    def is_working(self):
        """ Return whether or not the producer is still working. It is
        important that this is read-only, because it is read by the Consumer;
        if the Consumer writes to this variable, this will not longer be
        thread-safe. """
        return self.b_is_working
示例#39
0
class IMDBcon:
    def __init__(self):
        self.parser = Parser()
        self.directory = ''
        self.imdb = Imdb()
        self.cover_size = 214, 317
        self.square_size = 317, 317
        self.current = MOVIE_DICT
        self.all_files = []
        self.display = Display()

    def update_display(self, process, args=None):
        """Send process to self.display to print to screen"""
        if args:
            process = PROCESSES[process] % args
        else:
            process = PROCESSES[process]
        self.display.update_current_process(process)

    def make_empty_square(self):
        """Make transparent .png image"""
        image = Image.new('RGBA', self.square_size, (0, 0, 0, 0))
        image.save(EMPTY_PNG, 'PNG')

    def make_magic_script(self):
        """Make temporary magic 'set_icon.py' script"""
        with open(MAGIC_SCRIPT, 'w') as script:
            script.write(MAGIC_SCRIPT_STRING)

    def make_temp_files(self):
        """Make temporary files"""
        if os.path.isdir(TEMP_DIR):
            shutil.rmtree(TEMP_DIR)
        os.mkdir(TEMP_DIR)
        self.make_empty_square()
        self.make_magic_script()

    def remove_temp_dir(self):
        """Remove temporary directory"""
        self.display.current_title = ''
        self.update_display('clean')
        shutil.rmtree(TEMP_DIR)

    def set_current(self, dict_item=None, path=''):
        """Set self.current 'title' and 'path'"""
        if dict_item:
            self.current = dict_item
        elif path:
            self.current['path'] = path
            self.current['title'] = os.path.splitext(os.path.basename(path))[0]
        self.display.current_title = self.current['title']

    def set_id(self, imdb_id):
        if os.path.isdir(self.current['path']):
            id_path = os.path.join(self.current['path'], '.imdb_id')
            with open(id_path, 'w') as id_file:
                id_file.write(imdb_id)
        self.current['imdb_id'] = imdb_id

    def get_current_title(self):
        """Set self.current.imdb to Imdb Title object"""
        self.update_display('search')
        imdb_id = os.path.join(self.current['path'], '.imdb_id')
        # User can use preset imdb_id for full accuracy
        if self.current['imdb_id']:
            try:
                self.current['imdb_obj'] = self.imdb.get_title_by_id(self.current['imdb_id'])
            except HTTPError:
                error = 'Bad IMDB id for "%s" (%s)' % (
                    self.current['title'], self.current['imdb_id'])
                self.display.errors_caught.append(error)
                return False
        elif os.path.isfile(imdb_id):
            try:
                with open(imdb_id) as id_file:
                    self.current['imdb_obj'] = self.imdb.get_title_by_id(
                        ''.join(id_file.read().split()))
            except HTTPError:
                error = 'Bad IMDB id for "%s"' % self.current['title']
                self.display.errors_caught.append(error)
                return False
        else:
            try:
                titles = self.imdb.search_for_title(self.current['title'])
                temp = titles[0]  # Not an Imdb Title object
                self.current['imdb_obj'] = self.imdb.get_title_by_id(temp['imdb_id'])
                if os.path.isdir(self.current['path']):
                    with open(imdb_id, 'w') as id_file:
                        id_file.write(temp['imdb_id'])
            except IndexError:
                error = 'No Titles Found for "%s"' % self.current['title']
                self.display.errors_caught.append(error)
                return False
        if self.current['imdb_obj'].cover_url:
            return True
        else:
            error = 'No Cover Image Found for "%s"' % self.current['title']
            self.display.errors_caught.append(error)
            return False

    def retrieve_cover(self):
        """Download .jpg cover file from IMDB"""
        url = self.current['imdb_obj'].cover_url
        self.update_display('download', url)
        urlretrieve(url, TEMP_JPG)

    def resize_icon(self):
        """Set .jpg cover to self.cover_size"""
        self.update_display('resize', str(self.cover_size))
        image = Image.open(TEMP_JPG)
        resized = image.resize(self.cover_size, Image.ANTIALIAS)
        resized.save(TEMP_JPG)

    def square_icon(self):
        """Convert .jpg cover to .png squared cover"""
        self.update_display('square')
        background = Image.open(EMPTY_PNG)
        cover = Image.open(TEMP_JPG)
        offset = (50, 0)
        background.paste(cover, offset)
        background.save(TEMP_PNG)
        os.remove(TEMP_JPG)

    def set_icon_magic(self):
        """Run 'set_icon.py' script"""
        self.update_display('set_icon')
        os.system('python2.6 %s "%s" "%s"' % (MAGIC_SCRIPT, TEMP_PNG, self.current['path']))

    def set_icon(self):
        """Set directory icon to matching IMDB cover image"""
        if not self.get_current_title():
            return
        self.retrieve_cover()
        self.resize_icon()
        self.square_icon()
        self.set_icon_magic()
        for item in self.current['duplicates']:
            self.set_current(dict_item=item)
            self.set_icon_magic()
        os.remove(TEMP_PNG)

    def exit_message(self):
        """Display exit message along with any errors"""
        self.display.update_current_process('')
        print(PROCESSES['complete'])
        if self.display.errors_caught:
            for error in self.display.errors_caught:
                print(error)
        else:
            print('No Errors.')
        print

    def is_duplicate(self, item):
        for existing in self.all_files:
            if item['title'] == existing['title']:
                existing['duplicates'].append(item)
                return True
        return False

    def find_all(self):
        """Get list of all subdirectories and their files in directory"""
        for root, dirs, files in os.walk(self.directory):
            for directory in dirs:
                item = {
                    'path': os.path.join(root, directory),
                    'title': directory,
                    'imdb_id': None,
                    'imdb_obj': None,
                    'duplicates': []
                }
                if not self.is_duplicate(item):
                    self.all_files.append(item)
            if not self.parser.tag == '-a':
                continue
            for filename in files:
                split = os.path.splitext(filename)
                title, ext = split
                if ext[1:] not in ACCEPTED_EXTENSIONS:
                    continue
                item = {
                    'path': os.path.join(root, filename),
                    'title': title,
                    'imdb_id': None,
                    'duplicates': []
                }
                if not self.is_duplicate(item):
                    self.all_files.append(item)
        self.display.total_processes = len(self.all_files)

    def set_icons(self):
        """Set icons for all sub-directories in directory"""
        self.find_all()
        for item in self.all_files:
            self.set_current(dict_item=item)
            self.set_icon()
            self.display.completed_processes += 1

    def run(self):
        if not self.parser.valid:
            return
        tag, arg1, arg2 = self.parser.parsed
        self.make_temp_files()
        print ''
        if tag in ('-m', '-a'):
            self.directory = arg1
            self.set_icons()
        if tag == '-s':
            self.set_current(path=arg1)
            self.set_icon()
        if tag == '-id':
            self.set_current(path=arg2)
            self.set_id(arg1)
            self.set_icon()
        self.remove_temp_dir()
        self.exit_message()
示例#40
0
import csv
from imdbpie import Imdb
imdb = Imdb()
import openpyxl
import pandas as pd
review = []
movie = []
path = 'H:\IFS\IMDB\\test.xlsx'  # Excel sheet containing the name of the movies
path1 = 'H:\IFS\IMDB\\test1.xlsx'  # Excel sheet containing the result from the IMDB which contain the user review for each movie
df = pd.read_excel(path, sheetname='Sheet1')
for row in df['Movies']:
    try:
        movie.append(row)
        Id = imdb.search_for_title(row)[0]['imdb_id']
        review.append(imdb.get_title_user_reviews(Id)['totalReviews'])
    except IndexError:
        review.append("INVALID")

df = pd.DataFrame({'Movies': movie, 'Review': review})
writer = pd.ExcelWriter(path1, engine='xlsxwriter')
df.to_excel(writer, sheet_name='Sheet1')
writer.save()
示例#41
0
class Downloader(object):
    def __init__(self, app_dir, logger):
        self.app_dir_data = app_dir.user_data_dir
        self.cur_cache = {}
        self.cache = {}
        self.cache_file = os.path.join(app_dir.user_data_dir, CACHING_FILE)
        if os.path.exists(self.cache_file):
            with open(self.cache_file) as f:
                self.cache = yaml.load(f)
                if not self.cache:
                    self.cache = {}
        self.in_cache = []
        self.imdb = Imdb()
        self.logger = logger

    def download(self, data, full_title):
        key = data["title"]
        self.logger.info("searching subs for " + key)
        if data["type"] == "episode":
            key += "." + str(data["season"]) + "." + str(data["episode"])
        self.cur_cache = self.cache.get(key, {})
        imdb_id = self.get_imdb_id(data)

        self.save_cache(key)

        sub_json = self.get_sub_info(data, full_title, imdb_id)

        if not sub_json:
            self.sub_not_found_handler(data)

        z = self.get_sub_zip_file(sub_json)
        for f in z.namelist():
            if os.path.splitext(f)[1] in SUB_EXT:
                return self.get_sub_content(f, z)

    def get_sub_zip_file(self, sub_json):
        sub_file_down_res = requests.get(URL_ZIP + sub_json.get("id") + ".zip")
        try:
            return zipfile.ZipFile(StringIO(sub_file_down_res.content))
        except NameError:
            return zipfile.ZipFile(BytesIO(sub_file_down_res.content))

    def get_sub_content(self, f, z):
        ret = z.read(f)
        try:
            ret = ret.encode("utf-8").replace(b'\r\n', bytes('\n'))
        except:
            ret = ret.replace(b'\r\n', b'\n')
        return ret, os.path.splitext(f)[1]

    def sub_not_found_handler(self, data):
        title = data["title"]
        if data["type"] == "episode":
            title += "." + str(data["season"]) + "." + str(data["episode"])
        raise CantFindSubtitleException("Can't find subtitle id - for this title: " + title)

    def get_sub_info(self, data, full_title, imdb_id):
        json_from_wizdom = get_json_from_wizdom(imdb_id)
        if not json_from_wizdom:
            raise CantFindSubtitleException("wrong imdb id?")
        json_from_wizdom = json.loads(json_from_wizdom)
        self.logger.info("title from wizdom is: " + json_from_wizdom.get("title_en"))
        if data["type"] == "episode":
            return self.get_ep_sub(data, json_from_wizdom.get("subs", {}), full_title)
        else:
            return self.download_mov_sub(data, json_from_wizdom.get("subs", {}), full_title)

    def get_imdb_id(self, data):
        imdb_id = self.cur_cache.get("imdb_id", "")
        if not imdb_id:
            imdb_id = self.imdb.search_for_title(data["title"])[0].get("imdb_id")
            self.cur_cache["imdb_id"] = imdb_id
        self.logger.info("imdb_id is: " + imdb_id)
        return imdb_id

    def download_mov_sub(self, data, wizdom_json, full_title):
        return find_best_sub(wizdom_json, data, full_title)

    def get_ep_sub(self, data, wizdom_json, full_title):
        season_json = wizdom_json.get(str(data["season"]), {})
        if season_json:
            self.logger.info("found season " + str(data["season"]))
            episode_json = season_json.get(str(data["episode"]), {})
            if episode_json:
                return find_best_sub(episode_json, data, full_title)
            raise CantFindSubtitleException("can't find episode: " + str(data["episode"]))
        raise CantFindSubtitleException("can't find season: " + str(data["season"]))

    def close(self, specific):
        if not specific:
            self.clear_cache()
            with open(self.cache_file, "w") as f:
                yaml.dump(self.cache, f)
        clear_data_dir(self.app_dir_data)

    def clear_cache(self):
        for k, v in self.cache.items():
            try:
                if k not in self.in_cache and v["time_stamp"] < datetime.datetime.now() - datetime.timedelta(
                        CACHE_DAYS):
                    self.cache.pop(k)
            except KeyError:
                self.cache.pop(k)

    def save_cache(self, key):
        self.in_cache.append(key)
        self.in_cache = list(set(self.in_cache))
        self.cur_cache["time_stamp"] = datetime.datetime.now()
        self.cache[key] = self.cur_cache