def create_soup(url, post=None, headers=None): logger.info() data = httptools.downloadpage(url, post=post, headers=headers).data if 'Javascript is required' in data: generictools.js2py_conversion(data, url, domain_name=".seriesflix.to") data = httptools.downloadpage(url, post=post).data soup = BeautifulSoup(data, "html5lib", from_encoding="utf-8") return soup
def get_source(url, post=None, ctype=None): logger.info() headers = {"Cookie": "tipo_contenido=%s" % ctype} data = httptools.downloadpage(url, post=post).data if 'Javascript is required' in data: generictools.js2py_conversion(data, url, domain_name=".cliver.to") data = httptools.downloadpage(url, post=post).data data = re.sub(r'\n|\r|\t| |<br>|\s{2,}', "", data) return data
def create_soup(url, referer=None, post=None, unescape=False): logger.info() if referer: data = httptools.downloadpage(url, headers={'Referer':referer}, canonical=canonical).data if post: data = httptools.downloadpage(url, post=post).data else: data = httptools.downloadpage(url, canonical=canonical).data data = generictools.js2py_conversion(data, url, domain_name='', canonical=canonical) if unescape: data = scrapertools.unescape(data) soup = BeautifulSoup(data, "html.parser", from_encoding="utf-8") return soup
def get_source(url, soup=False, json=False, unescape=False, **opt): logger.info() opt['canonical'] = canonical data = httptools.downloadpage(url, **opt) if 'Javascript is required' in data.data: from lib import generictools data = generictools.js2py_conversion(data, url, domain_name='.%s' % domain, channel=canonical['channel'], headers=opt.get('headers', {})) if json: data = data.json else: data = data.data data = scrapertools.unescape(data) if unescape else data data = BeautifulSoup(data, "html5lib", from_encoding="utf-8") if soup else data return data