Exemplo n.º 1
0
 def rating(self):
     if self._rating is None:
         download_file(IMDB_RATING_URL, f"{OUTPUT_PATH}/title.ratings.tsv.gz", False)
         self._rating = SFrame.read_csv(f"{OUTPUT_PATH}/title.ratings.tsv.gz", delimiter="\t", na_values=["\\N"],
                                        verbose=self._verbose)
         self._rating = self._rating.join(self.title)
     return self._rating
Exemplo n.º 2
0
 def crew(self):
     if self._crew is None:
         download_file(IMDB_CREW_URL, f"{OUTPUT_PATH}/title.crew.tsv.gz", False)
         self._crew = SFrame.read_csv(f"{OUTPUT_PATH}/title.crew.tsv.gz", delimiter="\t", na_values=["\\N"],
                                      verbose=self._verbose)
         self._crew["directors"] = self.crew["directors"].apply(lambda c: c.split(","))
         self._crew = self._crew.stack("directors", "directors")
     return self._crew
Exemplo n.º 3
0
 def actors_movies(self):
     if self._actors_movies is None:
         download_file(IMDB_PRINCIPALS_URL, f"{OUTPUT_PATH}/title.principals.tsv.gz", False)
         self._actors_movies = SFrame.read_csv(f"{OUTPUT_PATH}/title.principals.tsv.gz", delimiter="\t",
                                               na_values=["\\N"], verbose=self._verbose)
         self._actors_movies = self._actors_movies.filter_by(["actor", "actress"], "category")[
             "tconst", "nconst", "characters"]
         self._actors_movies = self._actors_movies.join(self.title[self.title["titleType"] == "movie"])
         self._actors_movies = self._actors_movies.join(self.all_actors)
     return self._actors_movies
Exemplo n.º 4
0
 def all_actors(self):
     if self._all_actors is None:
         download_file(IMDB_NAMES_URL, f"{OUTPUT_PATH}/name.basics.tsv.gz", False)
         self._all_actors = SFrame.read_csv(f"{OUTPUT_PATH}/name.basics.tsv.gz", delimiter="\t",
                                            na_values=["\\N"], verbose=self._verbose)
         self._all_actors["primaryProfession"] = self._all_actors["primaryProfession"].apply(lambda x: x.split(","))
         self._all_actors = self._all_actors.stack("primaryProfession", "primaryProfession")
         self._all_actors = self._all_actors.filter_by(["actor", "actress"], "primaryProfession")
         self._all_actors["gender"] = self._all_actors.apply(lambda p: self.add_actor_gender(p))
     return self._all_actors
Exemplo n.º 5
0
    def popular_actors(self):
        if self._actors is None:
            download_file(IMDB_PRINCIPALS_URL, f"{OUTPUT_PATH}/title.principals.tsv.gz", False)
            self._actors = SFrame.read_csv(f"{OUTPUT_PATH}/title.principals.tsv.gz", delimiter="\t", na_values=["\\N"],
                                           verbose=self._verbose)
            self._actors = self._actors.filter_by(["actor", "actress"], "category")["tconst", "nconst"]

            self._actors = self._actors.join(
                self.rating[(self.rating["titleType"] == "movie") & (self.rating["numVotes"] > 1000)])
            self._actors = self._actors.groupby("nconst", operations={'averageRating': agg.AVG("averageRating"),
                                                                      'count': agg.COUNT()})
            self._actors = self._actors.sort("averageRating", ascending=False)
            names = SFrame.read_csv(f"{OUTPUT_PATH}/name.basics.tsv.gz", delimiter="\t")

            self._actors = self._actors.join(names)
            self._actors["gender"] = self._actors.apply(lambda p: self.add_actor_gender(p))

        return self._actors
    def __init__(self,
                 subtitle_info_dict,
                 use_top_k_roles=None,
                 ignore_roles_names=None):
        """
        Construct the SubtitleAnalyzer and create the video's role time line based
        :param subtitle_info_dict: dict with the video metadata created by the SubtitleFetcher class
        :param use_top_k_roles: use only the top K roles when constructing the movie? (None - to use all roles)
        :param ignore_roles_names: list of roles name to ignore

        """
        self._roles = defaultdict(lambda: {
            "role": None,
            "first": 0,
            "last": 0
        })
        self._interactions = {}
        if ignore_roles_names is None:
            ignore_roles_names = set()
        download_file(STANFORD_NLP_JAR_URL, STANFORD_NLP_JAR, False)
        download_file(STANFORD_NLP_MODEL_URL, STANFORD_NLP_MODEL, False)

        if not os.path.exists(subtitle_info_dict[SUBTITLE_PATH]):
            subtitle_info_dict[ROLES_PATH] = OUTPUT_PATH + subtitle_info_dict[
                ROLES_PATH].split("temp")[1]
            subtitle_info_dict[
                SUBTITLE_PATH] = OUTPUT_PATH + subtitle_info_dict[
                    SUBTITLE_PATH].split("temp")[1]

        imdb_id = subtitle_info_dict[IMDB_ID].strip('t')
        self._video_role_analyzer = VideoRolesAnalyzer(
            imdb_id, use_top_k_roles, ignore_roles_names,
            subtitle_info_dict[ROLES_PATH])

        subtitle_srt_path = subtitle_info_dict[SUBTITLE_PATH]

        self._subs_entities_timeline_dict = self.create_video_roles_timeline(
            subtitle_srt_path)
Exemplo n.º 7
0
 def title(self):
     if self._title is None:
         download_file(IMDB_TITLES_URL, f"{OUTPUT_PATH}/title.basics.tsv.gz", False)
         self._title = SFrame.read_csv(f"{OUTPUT_PATH}/title.basics.tsv.gz", delimiter="\t", na_values=["\\N"],
                                       verbose=self._verbose)
     return self._title