Пример #1
0
def searchByTitle(title):
    print "IMDB SEARCH HAPPENED"
    #uses google instead of IMDB search because it's better and easier to parse
    url = ('http://ajax.googleapis.com/ajax/services/search/web?v=1.0&q=' +
           urllib2.quote("site:imdb.com/title " + title))
    request = urllib2.Request(url, None, {'Referer': 'http://irc.00id.net'})
    response = urllib2.urlopen(request)

    results_json = json.load(response)
    results = results_json['responseData']['results']
    movieResults = []
    p = re.compile(r'<.*?>')
    for result in results:
        if not re.search("TV [a-zA-Z]",
                         result['titleNoFormatting']) and re.search(
                             "imdb.com/title/tt\\d{7}/$", result['url']):
            title = tools.decode_htmlentities(
                p.sub('', result['titleNoFormatting'].replace(" - IMDb", "")))
            content = tools.decode_htmlentities(p.sub('', result['content']))
            imdbid = re.search("tt\\d{7}", result['url']).group(0)
            movieResults.append({
                "name": title,
                "desc": content,
                "id": "IMDB=" + imdbid
            })

    return movieResults
Пример #2
0
    def Persons(self):
        personlist = []
        if self.imdbpage.find(text=re.compile("Directors?:")):
            directors = self.imdbpage.find(
                text=re.compile("Directors?:")).parent.parent
            directors = directors.findAll("a")
            for person in directors:
                if person.text.find('more credit') == -1:
                    personlist.append({
                        "Name": person.text,
                        "Type": "Director",
                        "Role": ""
                    })

        if self.imdbpage.find("table", "cast_list"):
            cast = self.imdbpage.find("table", "cast_list")
            cast = cast.findAll('tr')
            for person in cast:
                name = tools.decode_htmlentities(
                    tools.remove_html_tags(str(person.find(
                        'td', 'name'))).strip()).replace("\n", " ")
                role = tools.decode_htmlentities(
                    tools.remove_html_tags(str(person.find(
                        'td', 'character'))).strip()).replace("\n", " ")
                name = re.sub('\s+', ' ', name)
                role = re.sub('\s+', ' ', role)
                if name != "None":
                    if role == "None": role = ""
                    personlist.append({
                        "Name": name,
                        "Type": "Actor",
                        "Role": role
                    })

        return personlist
Пример #3
0
    def Persons(self):
        personlist = []
        if self.imdbpage.find(text=re.compile("Directors?:")):
            directors = self.imdbpage.find(text=re.compile("Directors?:")).parent.parent
            directors = directors.findAll("a")
            for person in directors:
                if person.text.find("more credit") == -1:
                    personlist.append({"Name": person.text, "Type": "Director", "Role": ""})

        if self.imdbpage.find("table", "cast_list"):
            cast = self.imdbpage.find("table", "cast_list")
            cast = cast.findAll("tr")
            for person in cast:
                name = tools.decode_htmlentities(
                    tools.remove_html_tags(str(person.find("td", "name"))).strip()
                ).replace("\n", " ")
                role = tools.decode_htmlentities(
                    tools.remove_html_tags(str(person.find("td", "character"))).strip()
                ).replace("\n", " ")
                name = re.sub("\s+", " ", name)
                role = re.sub("\s+", " ", role)
                if name != "None":
                    if role == "None":
                        role = ""
                    personlist.append({"Name": name, "Type": "Actor", "Role": role})

        return personlist
Пример #4
0
def searchByTitle(title):
    print "IMDB SEARCH HAPPENED"
    # uses google instead of IMDB search because it's better and easier to parse
    url = "http://ajax.googleapis.com/ajax/services/search/web?v=1.0&q=" + urllib2.quote("site:imdb.com/title " + title)
    request = urllib2.Request(url, None, {"Referer": "http://irc.00id.net"})
    response = urllib2.urlopen(request)

    results_json = json.load(response)
    results = results_json["responseData"]["results"]
    movieResults = []
    p = re.compile(r"<.*?>")
    for result in results:
        if not re.search("TV [a-zA-Z]", result["titleNoFormatting"]) and re.search(
            "imdb.com/title/tt\\d{7}/$", result["url"]
        ):
            title = tools.decode_htmlentities(p.sub("", result["titleNoFormatting"].replace(" - IMDb", "")))
            content = tools.decode_htmlentities(p.sub("", result["content"]))
            imdbid = re.search("tt\\d{7}", result["url"]).group(0)
            movieResults.append({"name": title, "desc": content, "id": "IMDB=" + imdbid})

    return movieResults
Пример #5
0
    def Description(self):
        page = self.imdbpage.find(id="overview-top")
        if len(page.findAll("p")) == 2:

            summary = str(page.findAll("p")[1])

            removelink = re.compile(r"\<a.*\/a\>")
            summary = removelink.sub("", summary)
            summary = tools.remove_html_tags(summary)
            summary = summary.replace("&raquo;", "")
            summary = tools.decode_htmlentities(summary.decode("utf-8", "ignore"))
            summary = summary.replace("\n", " ")
            return summary
Пример #6
0
    def Description(self):
        page = self.imdbpage.find(id="overview-top")
        if len(page.findAll('p')) == 2:

            summary = str(page.findAll('p')[1])

            removelink = re.compile(r'\<a.*\/a\>')
            summary = removelink.sub('', summary)
            summary = tools.remove_html_tags(summary)
            summary = summary.replace('&raquo;', "")
            summary = tools.decode_htmlentities(
                summary.decode("utf-8", 'ignore'))
            summary = summary.replace("\n", " ")
            return summary
Пример #7
0
 def ProductionYear(self):
     movietitle = tools.decode_htmlentities(
         tools.remove_html_tags(str(self.imdbpage.find('title'))).replace(
             " - IMDb", ""))
     movietitle = re.search("\(.*\)", movietitle).group(0).strip()
     return re.search("[1-2][0-9]{3}", movietitle).group(0).strip()
Пример #8
0
 def LocalTitle(self):
     movietitle = tools.decode_htmlentities(
         tools.remove_html_tags(str(self.imdbpage.find('title'))).replace(
             " - IMDb", ""))
     movietitle = re.sub("\(.*\)", "", movietitle).strip()
     return movietitle
Пример #9
0
 def ProductionYear(self):
     movietitle = tools.decode_htmlentities(
         tools.remove_html_tags(str(self.imdbpage.find("title"))).replace(" - IMDb", "")
     )
     movietitle = re.search("\(.*\)", movietitle).group(0).strip()
     return re.search("[1-2][0-9]{3}", movietitle).group(0).strip()
Пример #10
0
 def LocalTitle(self):
     movietitle = tools.decode_htmlentities(
         tools.remove_html_tags(str(self.imdbpage.find("title"))).replace(" - IMDb", "")
     )
     movietitle = re.sub("\(.*\)", "", movietitle).strip()
     return movietitle