class BuildGenreCountry(object): def __init__(self, titles): self.titles = titles self.graph = Graph("Graph for relationship among countries and genres") self.distinct_genre = {} self.genre_count = 1 def build_graph(self): for title in self.titles: links = [] genres = title.listed_in.split(", ") for genre in genres: node_link = Node(genre, Constants.GENRE_PREFIX_LABEL) node_link.id = self._get_genre_id(genre) link = Link(node_link) links.append(link) #create nodes (countries) title_countries = title.country.split(", ") for country in title_countries: if country: if country.endswith(','): country = country[:-1] new_node = Node(country, Constants.COUNTRY_PREFIX_LABEL) new_node.add_link(links) self.graph.add_node_withid_and_merge_links(new_node) def most_common_genre_by_country(self, country_id): country_node = next( (e for e in self.graph.nodes if e.id == country_id), None) if country_node: return max(country_node.links, key=attrgetter('weight')) else: return None def _get_genre_id(self, genre): genre_id = '' for key in self.distinct_genre: if self.distinct_genre[key] == genre: genre_id = genre if genre_id == '': self.distinct_genre[self.genre_count] = genre genre_id = genre self.genre_count += 1 return '{0}{1}'.format(Constants.GENRE_PREFIX_LABEL, genre_id)
class BuildTitleSimilarityGraph(object): def __init__(self, titles): self.titles = titles self.graph = Graph("Graph for relationship titles based on description text") def build_graph(self): for title in self.titles: links = [] title_node = Node(title.show_id, Constants.TITLE_PREFIX_LABEL, '{0}{1}'.format(Constants.TITLE_PREFIX_LABEL, title.title)) for link_title in self.titles: if link_title.show_id == title.show_id: continue similarity_coeff = BuildTitleSimilarityGraph._jaccard_similarity_text(title.description, link_title.description) if Constants.MIN_SIMILARITY_COEFFICIENT <= similarity_coeff: weight = int(similarity_coeff * 100) link_node = Node(link_title.show_id, Constants.TITLE_PREFIX_LABEL, '{0}{1}'.format(Constants.TITLE_PREFIX_LABEL, link_title.title)) link = Link(link_node, weight) links.append(link) title_node.add_link(links) self.graph.add_node(title_node) def get_most_similar(self, title_id): most_similar = next((e for e in self.graph.nodes if e.id == title_id), None) if most_similar: return max(most_similar.links, key = attrgetter('weight')) else: return None @staticmethod def _jaccard_similarity_text(str1, str2): a = set(str1.lower().split()) b = set(str2.lower().split()) c = a.intersection(b) return float(len(c)) / (len(a) + len(b) - len(c))
class BuildActorGenreGraph(object): def __init__(self, titles): self.titles = titles self.graph = Graph("Graph for relationship among actors and genres") self.distinct_genre = {} self.genre_count = 1 def build_graph(self): for title in self.titles: links = [] genres = title.listed_in.split(", ") for genre in genres: node_link = Node(genre, Constants.GENRE_PREFIX_LABEL) node_link.id = self._get_genre_id(genre) link = Link(node_link) links.append(link) #create nodes (actors) actors = title.cast.split(", ") for actor in actors: if actor: if actor.endswith(','): actor = actor[:-1] new_node = Node(actor, Constants.ACTOR_PREFIX_LABEL) new_node.add_link(links) self.graph.add_node_withid_and_merge_links(new_node) def _get_genre_id(self, genre): genre_id = '' for key in self.distinct_genre: if self.distinct_genre[key] == genre: genre_id = genre if genre_id == '': self.distinct_genre[self.genre_count] = genre genre_id = genre self.genre_count += 1 return '{0}{1}'.format(Constants.GENRE_PREFIX_LABEL, genre_id)
class BuildGenreYearGraph(object): def __init__(self, titles): self.titles = titles self.graph = Graph("Graph for relationship among genres and years") self.distinct_genre = {} self.genre_count = 1 def build_graph(self): for title in self.titles: links = [] # Building the links genres = title.listed_in.split(", ") for genre in genres: node_link = Node(genre, Constants.GENRE_PREFIX_LABEL) node_link.id = self._get_genre_id(genre) link = Link(node_link) links.append(link) year = title.release_year year_node = Node(year, Constants.YEAR_PREFIX_LABEL) year_node.add_link(links) self.graph.add_node_withid_and_merge_links(year_node) def _get_genre_id(self, genre): genre_id = '' for key in self.distinct_genre: if self.distinct_genre[key] == genre: genre_id = genre if genre_id == '': self.distinct_genre[self.genre_count] = genre genre_id = genre self.genre_count += 1 return '{0}{1}'.format(Constants.GENRE_PREFIX_LABEL, genre)
def __init__(self, titles): self.titles = titles self.graph = Graph("Graph for relationship among countries and genres") self.distinct_genre = {} self.genre_count = 1
def __init__(self, titles): self.titles = titles self.graph = Graph("Graph for relationship titles based on description text")