def get_category_list(url): page = urllib.urlopen(url) soup = BeautifulStoneSoup(page, convertEntities="html") categories = list() #<li class="vxp_filterPane_row"> #for item in soup.findAll('div','row'): for item in soup.findAll('li',{"class": "vxp_filterPane_row"}): print "item " + str(item) if item.find('a'): link = '/browse/tv-shows/genres?rt=ajax&tagquery=%3CtagQuery%3E%3Ctags%3E%3Ctag+namespace%3D%22tvgenre%22%3E' + str(item.a['data-tag']) + '%3C%2Ftag%3E%3Ctag+namespace%3D%22videotype%22%3Etv%3C%2Ftag%3E%3C%2Ftags%3E%3Csource%3EMsn%3C%2Fsource%3E%3CdataCatalog%3EVideo%3C%2FdataCatalog%3E%3C%2FtagQuery%3E&id=ux1_4' title = str(item.a['data-tag']) categories.append({'link' : link, 'title' : title}) return categories
def get_category_list(url): page = urllib.urlopen(url) soup = BeautifulStoneSoup(page, convertEntities="html") categories = list() #<li class="vxp_filterPane_row"> #for item in soup.findAll('div','row'): for item in soup.findAll('li', {"class": "vxp_filterPane_row"}): print "item " + str(item) if item.find('a'): link = '/browse/tv-shows/genres?rt=ajax&tagquery=%3CtagQuery%3E%3Ctags%3E%3Ctag+namespace%3D%22tvgenre%22%3E' + str( item.a['data-tag'] ) + '%3C%2Ftag%3E%3Ctag+namespace%3D%22videotype%22%3Etv%3C%2Ftag%3E%3C%2Ftags%3E%3Csource%3EMsn%3C%2Fsource%3E%3CdataCatalog%3EVideo%3C%2FdataCatalog%3E%3C%2FtagQuery%3E&id=ux1_4' title = str(item.a['data-tag']) categories.append({'link': link, 'title': title}) return categories
def get_show_list(url): page = urllib.urlopen(url) soup = BeautifulStoneSoup(page, convertEntities="html") #<div class="vxp_column_container"> #columns = soup.findAll('div','column') columns = soup.findAll('div', {"class": "vxp_column_container"}) show_links = list() for column in columns: show_links.extend(column.findAll('a')) shows = list() for show_link in show_links: print show_link link = str(show_link['href']) title = str(show_link.contents[0]) # title = title.replace('\n', '') title = title.replace(' ', '') shows.append({'link': link, 'title': title}) return shows
def get_show_list(url): page = urllib.urlopen(url) soup = BeautifulStoneSoup(page, convertEntities="html") #<div class="vxp_column_container"> #columns = soup.findAll('div','column') columns = soup.findAll('div',{"class": "vxp_column_container"}) show_links = list() for column in columns: show_links.extend(column.findAll('a')) shows = list() for show_link in show_links: print show_link link = str(show_link['href']) title = str(show_link.contents[0]) # title = title.replace('\n','') title = title.replace(' ','') shows.append({'link' : link, 'title' : title}) return shows