class BuildGenreCountry(object):
    def __init__(self, titles):
        self.titles = titles
        self.graph = Graph("Graph for relationship among countries and genres")
        self.distinct_genre = {}
        self.genre_count = 1

    def build_graph(self):
        for title in self.titles:
            links = []
            genres = title.listed_in.split(", ")

            for genre in genres:
                node_link = Node(genre, Constants.GENRE_PREFIX_LABEL)
                node_link.id = self._get_genre_id(genre)
                link = Link(node_link)
                links.append(link)

            #create nodes (countries)
            title_countries = title.country.split(", ")
            for country in title_countries:
                if country:
                    if country.endswith(','):
                        country = country[:-1]

                    new_node = Node(country, Constants.COUNTRY_PREFIX_LABEL)
                    new_node.add_link(links)
                    self.graph.add_node_withid_and_merge_links(new_node)

    def most_common_genre_by_country(self, country_id):
        country_node = next(
            (e for e in self.graph.nodes if e.id == country_id), None)

        if country_node:
            return max(country_node.links, key=attrgetter('weight'))

        else:
            return None

    def _get_genre_id(self, genre):
        genre_id = ''

        for key in self.distinct_genre:
            if self.distinct_genre[key] == genre:
                genre_id = genre

        if genre_id == '':
            self.distinct_genre[self.genre_count] = genre
            genre_id = genre
            self.genre_count += 1

        return '{0}{1}'.format(Constants.GENRE_PREFIX_LABEL, genre_id)
示例#2
0
class BuildTitleSimilarityGraph(object):
   
    def __init__(self, titles):
        self.titles = titles
        self.graph = Graph("Graph for relationship titles based on description text")


    def build_graph(self):
        for title in self.titles:
            links = []
            title_node = Node(title.show_id, Constants.TITLE_PREFIX_LABEL, '{0}{1}'.format(Constants.TITLE_PREFIX_LABEL, title.title))
            
            for link_title in self.titles:
                if link_title.show_id == title.show_id:
                    continue

                similarity_coeff = BuildTitleSimilarityGraph._jaccard_similarity_text(title.description, link_title.description)
                
                if Constants.MIN_SIMILARITY_COEFFICIENT <= similarity_coeff:
                    weight = int(similarity_coeff * 100)
                    link_node = Node(link_title.show_id, Constants.TITLE_PREFIX_LABEL, '{0}{1}'.format(Constants.TITLE_PREFIX_LABEL, link_title.title))
                    link = Link(link_node, weight)
                    links.append(link)
            
            title_node.add_link(links)
            self.graph.add_node(title_node)

    def get_most_similar(self, title_id):
        most_similar = next((e for e in self.graph.nodes if e.id == title_id), None)

        if most_similar:
            return max(most_similar.links, key = attrgetter('weight'))
        else:
            return None

    @staticmethod
    def _jaccard_similarity_text(str1, str2):
        a = set(str1.lower().split()) 
        b = set(str2.lower().split())
        c = a.intersection(b)

        return float(len(c)) / (len(a) + len(b) - len(c))
class BuildActorGenreGraph(object):
    def __init__(self, titles):
        self.titles = titles
        self.graph = Graph("Graph for relationship among actors and genres")
        self.distinct_genre = {}
        self.genre_count = 1

    def build_graph(self):
        for title in self.titles:
            links = []
            genres = title.listed_in.split(", ")

            for genre in genres:
                node_link = Node(genre, Constants.GENRE_PREFIX_LABEL)
                node_link.id = self._get_genre_id(genre)
                link = Link(node_link)
                links.append(link)

            #create nodes (actors)
            actors = title.cast.split(", ")
            for actor in actors:
                if actor:
                    if actor.endswith(','):
                        actor = actor[:-1]

                    new_node = Node(actor, Constants.ACTOR_PREFIX_LABEL)
                    new_node.add_link(links)
                    self.graph.add_node_withid_and_merge_links(new_node)

    def _get_genre_id(self, genre):
        genre_id = ''
        for key in self.distinct_genre:
            if self.distinct_genre[key] == genre:
                genre_id = genre

        if genre_id == '':
            self.distinct_genre[self.genre_count] = genre
            genre_id = genre
            self.genre_count += 1

        return '{0}{1}'.format(Constants.GENRE_PREFIX_LABEL, genre_id)
示例#4
0
class BuildGenreYearGraph(object):
    def __init__(self, titles):
        self.titles = titles
        self.graph = Graph("Graph for relationship among genres and years")
        self.distinct_genre = {}
        self.genre_count = 1

    def build_graph(self):
        for title in self.titles:
            links = []

            # Building the links
            genres = title.listed_in.split(", ")
            for genre in genres:
                node_link = Node(genre, Constants.GENRE_PREFIX_LABEL)
                node_link.id = self._get_genre_id(genre)
                link = Link(node_link)
                links.append(link)

            year = title.release_year
            year_node = Node(year, Constants.YEAR_PREFIX_LABEL)
            year_node.add_link(links)
            self.graph.add_node_withid_and_merge_links(year_node)

    def _get_genre_id(self, genre):
        genre_id = ''

        for key in self.distinct_genre:
            if self.distinct_genre[key] == genre:
                genre_id = genre

        if genre_id == '':
            self.distinct_genre[self.genre_count] = genre
            genre_id = genre
            self.genre_count += 1

        return '{0}{1}'.format(Constants.GENRE_PREFIX_LABEL, genre)
 def __init__(self, titles):
     self.titles = titles
     self.graph = Graph("Graph for relationship among countries and genres")
     self.distinct_genre = {}
     self.genre_count = 1
示例#6
0
 def __init__(self, titles):
     self.titles = titles
     self.graph = Graph("Graph for relationship titles based on description text")