def _expand(link, timeout=2, **kwargs): ''' Expands a url, while taking into consideration: special link shortener or analytics platforms that either need a sophisticated redirect(st.sh), or parsing of the url (ln.is) :param link: string of a link to unshorten. :returns: a dictionary with the original link, the unshortened link, and the unshortened domain. ''' try: r = requests.head(link, allow_redirects=True, timeout=timeout, **kwargs) r.raise_for_status() url_long = r.url domain = get_domain(url_long) except requests.exceptions.RequestException as e: domain, url_long = _parse_error(str(e)) # replace list with constants.url_appenders if domain in constants.url_appenders: url_long = url_long.replace(domain, '') domain = get_domain(url_long) elif domain in constants.short_domain_ad_redirects or domain == -1: url_long = unshortenit.UnshortenIt().unshorten(link, timeout=timeout) domain = get_domain(url_long) return dict(original_url=link, resolved_domain=domain, resolved_url=url_long)
def processPage(self, content): soup = WebRequest.as_soup(self.content) releases = [] for tweet in soup.find_all('li', attrs={"data-item-type": "tweet"}): if "promoted" in str(tweet['class']): continue content = tweet.find("p", class_='tweet-text') if content and content.a: itemtxt = content.get_text() itemurl = content.a['data-expanded-url'] itemurl = unshortenit.UnshortenIt().unshorten(itemurl) urlnl = urllib.parse.urlsplit(itemurl).netloc.lower() if urlnl == 'www.baka-tsuki.org': msg = self.dispatchBT(itemurl, itemtxt) if msg: releases.append(msg) if urlnl in NANO_DESU_MAP: msg = self.dispatchNanoDesu(urlnl, itemurl, itemtxt) if msg: releases.append(msg) self.log.info("Found %s releases from Twitter Feed", len(releases)) if releases: self.sendReleases(releases)
def unwrap_redirect(urlin, resolve_redirects=True): try: url = unshortenit.UnshortenIt(urlcache=CacheObject()).unshorten( urlin, resolve_30x=resolve_redirects) return url except (unshortenit.NotFound, unshortenit.UnshortenFailed, requests.exceptions.ConnectionError): return None