def test_urls(self): import parsers urls = [ 'http://tv.sohu.com/20120726/n349115692.shtml', # 'http://v.youku.com/v_show/id_XNzM3MTQwMDY4.html?f=22506977&ev=1', 'http://www.56.com/u52/v_MTE4NjA0MDY1.html', 'http://www.tudou.com/programs/view/ZAoQTPEqjAo/', # 'http://www.iqiyi.com/v_19rrhkqzgo.html' ] for url in urls: print parsers.getVidPageParser(url).info(url)
def parse_url(self, url, vidfmt, npf, outpath): parser = parsers.getVidPageParser(self.url) urls, title, self.ext, nperfile, headers = parser.info(url, vidfmt=vidfmt) urls = filter(lambda x: x.strip() != '', urls) if not self.ext: self.ext = guess_ext(urls, title) if nperfile: npf = nperfile title = to_native_string(title) self.outname = pjoin(self.outpath, '%s.%s' % (title, self.ext)) self.tmpdir = pjoin(self.outpath, escape_file_path(title) + '.downloading') self.__task_history = pjoin(self.tmpdir, 'url.txt') return urls, npf, headers