def Persons(self): personlist = [] if self.imdbpage.find(text=re.compile("Directors?:")): directors = self.imdbpage.find(text=re.compile("Directors?:")).parent.parent directors = directors.findAll("a") for person in directors: if person.text.find("more credit") == -1: personlist.append({"Name": person.text, "Type": "Director", "Role": ""}) if self.imdbpage.find("table", "cast_list"): cast = self.imdbpage.find("table", "cast_list") cast = cast.findAll("tr") for person in cast: name = tools.decode_htmlentities( tools.remove_html_tags(str(person.find("td", "name"))).strip() ).replace("\n", " ") role = tools.decode_htmlentities( tools.remove_html_tags(str(person.find("td", "character"))).strip() ).replace("\n", " ") name = re.sub("\s+", " ", name) role = re.sub("\s+", " ", role) if name != "None": if role == "None": role = "" personlist.append({"Name": name, "Type": "Actor", "Role": role}) return personlist
def Persons(self): personlist = [] if self.imdbpage.find(text=re.compile("Directors?:")): directors = self.imdbpage.find( text=re.compile("Directors?:")).parent.parent directors = directors.findAll("a") for person in directors: if person.text.find('more credit') == -1: personlist.append({ "Name": person.text, "Type": "Director", "Role": "" }) if self.imdbpage.find("table", "cast_list"): cast = self.imdbpage.find("table", "cast_list") cast = cast.findAll('tr') for person in cast: name = tools.decode_htmlentities( tools.remove_html_tags(str(person.find( 'td', 'name'))).strip()).replace("\n", " ") role = tools.decode_htmlentities( tools.remove_html_tags(str(person.find( 'td', 'character'))).strip()).replace("\n", " ") name = re.sub('\s+', ' ', name) role = re.sub('\s+', ' ', role) if name != "None": if role == "None": role = "" personlist.append({ "Name": name, "Type": "Actor", "Role": role }) return personlist
def Description(self): page = self.imdbpage.find(id="overview-top") if len(page.findAll("p")) == 2: summary = str(page.findAll("p")[1]) removelink = re.compile(r"\<a.*\/a\>") summary = removelink.sub("", summary) summary = tools.remove_html_tags(summary) summary = summary.replace("»", "") summary = tools.decode_htmlentities(summary.decode("utf-8", "ignore")) summary = summary.replace("\n", " ") return summary
def Description(self): page = self.imdbpage.find(id="overview-top") if len(page.findAll('p')) == 2: summary = str(page.findAll('p')[1]) removelink = re.compile(r'\<a.*\/a\>') summary = removelink.sub('', summary) summary = tools.remove_html_tags(summary) summary = summary.replace('»', "") summary = tools.decode_htmlentities( summary.decode("utf-8", 'ignore')) summary = summary.replace("\n", " ") return summary
def ProductionYear(self): movietitle = tools.decode_htmlentities( tools.remove_html_tags(str(self.imdbpage.find('title'))).replace( " - IMDb", "")) movietitle = re.search("\(.*\)", movietitle).group(0).strip() return re.search("[1-2][0-9]{3}", movietitle).group(0).strip()
def LocalTitle(self): movietitle = tools.decode_htmlentities( tools.remove_html_tags(str(self.imdbpage.find('title'))).replace( " - IMDb", "")) movietitle = re.sub("\(.*\)", "", movietitle).strip() return movietitle
def IMDBrating(self): return tools.remove_html_tags( str(self.imdbpage.find(id="star-bar-user-rate").b))
def extract_clipping_content(raw): m = re.search(r'<en-note>(.*)<\/en-note>', raw) if m: content = m.groups()[0] if content: return tools.remove_html_tags(content)
def ProductionYear(self): movietitle = tools.decode_htmlentities( tools.remove_html_tags(str(self.imdbpage.find("title"))).replace(" - IMDb", "") ) movietitle = re.search("\(.*\)", movietitle).group(0).strip() return re.search("[1-2][0-9]{3}", movietitle).group(0).strip()
def LocalTitle(self): movietitle = tools.decode_htmlentities( tools.remove_html_tags(str(self.imdbpage.find("title"))).replace(" - IMDb", "") ) movietitle = re.sub("\(.*\)", "", movietitle).strip() return movietitle
def IMDBrating(self): return tools.remove_html_tags(str(self.imdbpage.find(id="star-bar-user-rate").b))