def _search_movie(self, title, results, genre_id, search_type, start_year, end_year): """Return list of movies""" grabber = HTMLGrabber() li_list = [] img_list = [] params = {"q": title.encode("utf-8"), "page": 1} if genre_id: params['genreIds'] = genre_id if start_year: params['startYear'] = start_year if end_year: params['endYear'] = end_year search_url = "" if search_type: search_url = "/" + search_type url = filmweb_search_blank + search_url + "?" + urlencode(params) content = grabber.retrieve(url) # @Make search more pages not only 1 soup = BeautifulSoup(content) li_list.extend(soup.findAll('div', {'class': 'hitDescWrapper'})) img_list.extend(soup.findAll('div', {'class': 'hitImage'})) for i, li in enumerate(li_list): a = li.find('a', {'class': 'hdr hdr-medium hitTitle'}) title = a.text url = a['href'] # have to do another check because sometimes url doesnt provide movieID aimg = img_list[i].find('a') if aimg is not None: img = aimg.find("img") movieID = get_real_id(url, img['src']) yield movieID, title, url
def _search_filtered_movie(self, title, results, genre_id, search_type): grabber = HTMLGrabber() params = {} params['page'] = 1 if title: params['q'] = title.encode("utf-8") if genre_id: params['genreIds'] = genre_id search_url = "" if search_type: search_url = "/" + search_type url = filmweb_search_blank + search_url + "?" + urllib.urlencode(params) content = grabber.retrieve(url) soup = BeautifulSoup(content) hits = soup.findAll('li', {'id': re.compile('hit_([0-9]*)')}) for hit in hits: h3 = hit.find("h3") url = h3.find("a")['href'] div_img = hit.find("div", {'class': 'filmPoster-1'}) img = div_img.find("img") movieID = get_real_id(url, img['src']) yield movieID, title, url
def parse_filmography(self): from filmweb.Movie import Movie movie_links = self.soup.findAll("td",{'class':"filmTitleCol"}) movies = [] for movie_link in movie_links: a = movie_link.find("a") movieID = get_real_id(a['href']) movies.append( Movie(objID=movieID,title=a.text,url=a['href']) ) return movies
def parse_filmography(self): from filmweb.Movie import Movie movie_links = self.soup.findAll("tr", {'data-type': "F"}) movies = [] for movie_link in movie_links: a = movie_link.find("a") movieID = get_real_id(a['href']) movies.append(Movie(objID=movieID, title=a.text, url=a['href'])) return movies
def _search_movie(self,title,results,): """Return list of movies""" grabber = HTMLGrabber() p_title = grabber.encode_string(title) li_list = [] img_list = [] #for type in ['film','serial']: content = grabber.retrieve(filmweb_search % (p_title,1)) #@Make search more pages not only 1 soup = BeautifulSoup(content) li_list.extend( soup.findAll('div', {'class':'hitDescWrapper'}) ) img_list.extend( soup.findAll('div', {'class':'hitImage'}) ) for i, li in enumerate(li_list): a = li.find('a',{'class':re.compile('hdr.*')}) # class="hdr hdr-medium hitTitle" for now title = a.text url = a['href'] # have to do another check because sometimes url doesnt provide movieID aimg = img_list[i].find('a') if aimg is not None: img = aimg.find("img") movieID = get_real_id(url,img['src']) yield movieID,title,url
def _search_person(self, title, results=20): # http://www.filmweb.pl/search/person?q=Tom+Cruise """Return list of persons""" grabber = HTMLGrabber() p_title = grabber.encode_string(title) li_list = [] img_list = [] content = grabber.retrieve(filmweb_person_search % (p_title, 1)) #@Make search more pages not only 1 soup = BeautifulSoup(content) li_list.extend(soup.findAll('div', {'class': 'hitDescWrapper'})) img_list.extend(soup.findAll('div', {'class': 'hitImage'})) for i, li in enumerate(li_list): a = li.find('a', {'class': 'hdr hdr-medium hitTitle'}) title = a.text url = a['href'] # have to do another check because sometimes url doesnt provide movieID aimg = img_list[i].find('a') if aimg is not None: img = aimg.find('img') personID = get_real_id(url, img['src']) yield personID, title, url