def top_shows(self): section = common.parseDOM(self.tv_shows_page(), "div", attrs={"id": "secondary"}) names = common.parseDOM(section, "a") urls = common.parseDOM(section, "a", ret="href") return self.generate_shows_hash(names, urls)
def episode_streams(self, path): page = self.tv_show_episode_path(path) names = common.parseDOM(page, "b") # seems hacky but works perfectly # urls = common.parseDOM(page, "iframe", ret="src") #hacky but works better urls = [ common.parseDOM(iframe.lower() + "</iframe>", "iframe", ret="src")[0] for iframe in common.parseDOM(page, "b", attrs={"id": "ko"}, ret="data-iframe") ] urls = [urlresolver.resolve(url) for url in urls] return dict(filter(lambda (source, url): url, zip(names, urls)))
def shows(self): section_headers = self.show_sections() section = common.parseDOM(self.tv_shows_page(), "div", attrs={"id": "primary"}) shows_by_sections = common.parseDOM(section, "ul") # TODO show the annotations too? shows_by_sections = [ self.generate_shows_hash( common.parseDOM(shows_in_section, "a"), common.parseDOM(shows_in_section, "a", ret="href")) for shows_in_section in shows_by_sections ] return dict(zip(section_headers, shows_by_sections))
def show_episodes(self, path): seasons = common.parseDOM(self.tv_show_page(path), "div", attrs={"class": "omsc-toggle-title"}) seasons_episodes = common.parseDOM(self.tv_show_page(path), "ul", attrs={"class": "b"}) seasons_episodes = [ self.generate_hash(common.parseDOM(episodes_in_season, "a"), common.parseDOM(episodes_in_season, "a", ret="href"), path_extractor=self.extract_episode_path) for episodes_in_season in seasons_episodes ] return dict(zip(seasons, seasons_episodes))
def domMeta(dom, name): return parseDOM(dom, 'meta', ret='content', attrs={'name':name})[0]
print("\n=== Processing file %s ===" % inputFile) ret = call(['odt2html', inputFile]) if ret != 0: print('Failed converting "%s"' % inputFile) sys.exit(1) baseInput = inputFile.split('/')[-1][:-4] date = baseInput.split('_')[0] htmlFileSrc = '%s.html' % inputFile[:-4] fd = open(htmlFileSrc, 'r') buf = fd.read() fd.close() rawHead = parseDOM(buf, 'head') rawBody = parseDOM(buf, 'body')[0] dataTitle = parseDOM(rawHead, 'title')[0] dataTags = domMeta(rawHead, 'keywords').split(', ') dataShortTitle = domMeta(rawHead, 'classification') print('Title: %s' % dataTitle) print('Short title: %s' % dataShortTitle) print('Tags: %s' % dataTags) meta = { 'longTitle' : dataTitle, 'shortTitle' : dataShortTitle, 'tags' : dataTags }
listitem=li, isFolder=True) url = build_url({'mode': 'folder', 'foldername': 'shortfilms'}) li = xbmcgui.ListItem('Short Films', iconImage='DefaultFolder.png') xbmcplugin.addDirectoryItem(handle=addon_handle, url=url, listitem=li, isFolder=True) xbmcplugin.endOfDirectory(addon_handle) elif mode[0] == 'folder': foldername = 'fiction' link_html = get_url('http://www.cinemargentino.com/category/type/%s' % foldername ) #ret = parseDOM(link_html, "a", attrs = { "class": "title" }, ret = "href") ret = parseDOM(link_html, "div", attrs = { "class": "movie_list_cell" }) for entry in ret: #link_html = get_url('http://www.cinemargentino.com' + link ) print entry thumb_t = parseDOM(entry, "a", attrs = { "class": "subtitle_marker"}) thumbnail = parseDOM(thumb_t, "img", ret = "src") print "===============================================================" print thumbnail print "===============================================================" entry_info = parseDOM(entry, "div", attrs = { "class": "movie_list_cell_info" }) print entry_info link = parseDOM(entry_info, "a", attrs = { "class": "title" }, ret = "href")[0] title = parseDOM(entry_info, "a", attrs = { "class": "title" })[0] author = parseDOM(entry_info, "h3")[0]