示例#1
0
    def _search_movie(self,title,results,):
        """Return list of movies"""
        grabber = HTMLGrabber()
        p_title = grabber.encode_string(title)
        li_list = []
        img_list = []

        #for type in ['film','serial']:
        content = grabber.retrieve(filmweb_search % (p_title,1)) #@Make search more pages not only 1
        soup = BeautifulSoup(content)
        li_list.extend( soup.findAll('div', {'class':'hitDescWrapper'}) )
        img_list.extend( soup.findAll('div', {'class':'hitImage'}) )

        for i, li in enumerate(li_list):
            a = li.find('a',{'class':re.compile('hdr.*')}) # class="hdr hdr-medium hitTitle" for now
            title = a.text
            url = a['href']
            # have to do another check because sometimes url doesnt provide movieID
            aimg = img_list[i].find('a')
            if aimg is not None:
                img = aimg.find("img")
                movieID = get_real_id(url,img['src'])
                yield movieID,title,url
示例#2
0
    def _search_person(self, title, results=20):
        # http://www.filmweb.pl/search/person?q=Tom+Cruise
        """Return list of persons"""
        grabber = HTMLGrabber()
        p_title = grabber.encode_string(title)
        li_list = []
        img_list = []

        content = grabber.retrieve(filmweb_person_search % (p_title, 1))  #@Make search more pages not only 1
        soup = BeautifulSoup(content)
        li_list.extend(soup.findAll('div', {'class': 'hitDescWrapper'}))
        img_list.extend(soup.findAll('div', {'class': 'hitImage'}))

        for i, li in enumerate(li_list):
            a = li.find('a', {'class': 'hdr hdr-medium hitTitle'})
            title = a.text
            url = a['href']
            # have to do another check because sometimes url doesnt provide movieID
            aimg = img_list[i].find('a')
            if aimg is not None:
                img = aimg.find('img')
                personID = get_real_id(url, img['src'])
                yield personID, title, url