def test_sanitize_url(self): self.assertEquals( utils.sanitize_url('http://dk./'), 'http://dk/' ) self.assertEquals( utils.sanitize_url('http://google.com./'), 'http://google.com/' ) self.assertEquals( utils.sanitize_url('http://google.com/'), 'http://google.com/' ) self.assertEquals( utils.sanitize_url('https://github.com/reddit/reddit/pull/1302'), 'https://github.com/reddit/reddit/pull/1302' ) self.assertEquals( utils.sanitize_url('http://dk../'), None )
def link_equals(urla,urlb,fast=False): try: if not fast: urla=urllib2.urlopen(HeadRequest(urla)).geturl() urlb=urllib2.urlopen(HeadRequest(urlb)).geturl() urla=sanitize_url(urla) urlb=sanitize_url(urlb) return urla==urlb except: pass return False
def GET_bookmarklet(self, what): '''Controller for the functionality of the bookmarklets (not the distribution page)''' action = '' for type in ['like', 'dislike', 'save']: if what.startswith(type): action = type break url = sanitize_url(request.get.u) uh = request.get.get('uh', "") try: links = Link._by_url(url) except: links = [] Subreddit.load_subreddits(links, return_dict = False) user = c.user if c.user_is_loggedin else None links = [l for l in links if l.subreddit_slow.can_view(user)] if links and not c.user_is_loggedin: return self.redirect("/static/css_login.png") elif links and c.user_is_loggedin: if not c.user.valid_hash(uh): return self.redirect("/static/css_update.png") elif action in ['like', 'dislike']: #vote up all of the links for link in links: Vote.vote(c.user, link, action == 'like', request.ip) elif action == 'save': link = max(links, key = lambda x: x._score) link._save(c.user) return self.redirect("/static/css_%sd.png" % action) return self.redirect("/static/css_submit.png")
def GET_search(self, query, num, reverse, after, count, sort, restrict_sr): """Search links page.""" if query and '.' in query: url = sanitize_url(query, require_scheme = True) if url: return self.redirect("/submit" + query_string({'url':url})) if not restrict_sr: site = DefaultSR() else: site = c.site try: q = IndextankQuery(query, site, sort) num, t, spane = self._search(q, num = num, after = after, reverse = reverse, count = count) res = SearchPage(_('search results'), query, t, num, content=spane, nav_menus = [SearchSortMenu(default=sort)], search_params = dict(sort = sort), simple=False, site=c.site, restrict_sr=restrict_sr).render() return res except (IndextankException, socket.error), e: return self.search_fail(e)
def run(self, url, sr = None): if sr is None and not isinstance(c.site, FakeSubreddit): sr = c.site elif sr: try: sr = Subreddit._by_name(str(sr)) except (NotFound, UnicodeEncodeError): self.set_error(errors.SUBREDDIT_NOEXIST) sr = None else: sr = None if not url: return self.error(errors.NO_URL) url = utils.sanitize_url(url) if not url: return self.error(errors.BAD_URL) if url == 'self': if self.allow_self: return url elif not self.lookup: return url elif url: try: l = Link._by_url(url, sr) self.error(errors.ALREADY_SUB) return utils.tup(l) except NotFound: return url return self.error(errors.BAD_URL)
def GET_search(self, query, num, reverse, after, count, sort, restrict_sr): """Search links page.""" if query and '.' in query: url = sanitize_url(query, require_scheme=True) if url: return self.redirect("/submit" + query_string({'url': url})) if not restrict_sr: site = DefaultSR() else: site = c.site try: q = IndextankQuery(query, site, sort) num, t, spane = self._search(q, num=num, after=after, reverse=reverse, count=count) res = SearchPage(_('search results'), query, t, num, content=spane, nav_menus=[SearchSortMenu(default=sort)], search_params=dict(sort=sort), simple=False, site=c.site, restrict_sr=restrict_sr).render() return res except (IndextankException, socket.error), e: return self.search_fail(e)
def GET_search(self, query, num, reverse, after, count, sort, restrict_sr): """Search links page.""" if query and "." in query: url = sanitize_url(query, require_scheme=True) if url: return self.redirect("/submit" + query_string({"url": url})) if not restrict_sr: site = DefaultSR() else: site = c.site try: cleanup_message = None try: q = IndextankQuery(query, site, sort) if query: query = query.replace("proddit:", "reddit:") q = IndextankQuery(query, site, sort) num, t, spane = self._search(q, num=num, after=after, reverse=reverse, count=count) except InvalidIndextankQuery: # strip the query down to a whitelist cleaned = re.sub("[^\w\s]+", "", query) cleaned = cleaned.lower() # if it was nothing but mess, we have to stop if not cleaned.strip(): num, t, spane = 0, 0, [] cleanup_message = strings.completely_invalid_search_query else: q = IndextankQuery(cleaned, site, sort) num, t, spane = self._search(q, num=num, after=after, reverse=reverse, count=count) cleanup_message = strings.invalid_search_query % {"clean_query": cleaned} cleanup_message += " " cleanup_message += strings.search_help % {"search_help": self.search_help_page} if query: query = query.replace("reddit:", "proddit:") res = SearchPage( _("search results"), query, t, num, content=spane, nav_menus=[SearchSortMenu(default=sort)], search_params=dict(sort=sort), infotext=cleanup_message, simple=False, site=c.site, restrict_sr=restrict_sr, ).render() return res except (IndextankException, socket.error), e: return self.search_fail(e)
def GET_search(self, query, num, reverse, after, count, sort): """Search links page.""" if query and '.' in query: url = sanitize_url(query, require_scheme = True) if url: return self.redirect("/submit" + query_string({'url':url})) q = IndextankQuery(query, c.site, sort) num, t, spane = self._search(q, num = num, after = after, reverse = reverse, count = count) if not isinstance(c.site,FakeSubreddit) and not c.cname: all_reddits_link = "%s/search%s" % (subreddit.All.path, query_string({'q': query})) d = {'reddit_name': c.site.name, 'reddit_link': "http://%s/"%get_domain(cname = c.cname), 'all_reddits_link': all_reddits_link} infotext = strings.searching_a_reddit % d else: infotext = None res = SearchPage(_('search results'), query, t, num, content=spane, nav_menus = [SearchSortMenu(default=sort)], search_params = dict(sort = sort), infotext = infotext).render() return res
def run(self, url, sr = None): if sr is None and not isinstance(c.site, FakeSubreddit): sr = c.site elif sr: try: sr = Subreddit._by_name(sr) except NotFound: c.errors.add(errors.SUBREDDIT_NOEXIST) sr = None else: sr = None if not url: return self.error(errors.NO_URL) url = utils.sanitize_url(url) if url == 'self': return url elif url: try: l = Link._by_url(url, sr) self.error(errors.ALREADY_SUB) return utils.tup(l) except NotFound: return url return self.error(errors.BAD_URL)
def run(self, url, sr=None): if sr is None and not isinstance(c.site, FakeSubreddit): sr = c.site elif sr: try: sr = Subreddit._by_name(sr) except NotFound: c.errors.add(errors.SUBREDDIT_NOEXIST) sr = None else: sr = None if not url: return self.error(errors.NO_URL) url = utils.sanitize_url(url) if url == 'self': return url elif url: try: l = Link._by_url(url, sr) self.error(errors.ALREADY_SUB) return utils.tup(l) except NotFound: return url return self.error(errors.BAD_URL)
def GET_search(self, query, num, time, reverse, after, count, langs, sort): """Search links page.""" if query and '.' in query: url = sanitize_url(query, require_scheme=True) if url: return self.redirect("/submit" + query_string({'url': url})) if langs and self.verify_langs_regex.match(langs): langs = langs.split(',') else: langs = c.content_langs subreddits = None authors = None if c.site == subreddit.Friends and c.user_is_loggedin and c.user.friends: authors = c.user.friends elif isinstance(c.site, MultiReddit): subreddits = c.site.sr_ids elif not isinstance(c.site, FakeSubreddit): subreddits = [c.site._id] q = LinkSearchQuery( q=query, timerange=time, langs=langs, subreddits=subreddits, authors=authors, sort=SearchSortMenu.operator(sort)) num, t, spane = self._search( q, num=num, after=after, reverse=reverse, count=count) if not isinstance(c.site, FakeSubreddit) and not c.cname: all_reddits_link = "%s/search%s" % (subreddit.All.path, query_string({ 'q': query })) d = { 'reddit_name': c.site.name, 'reddit_link': "http://%s/" % get_domain(cname=c.cname), 'all_reddits_link': all_reddits_link } infotext = strings.searching_a_reddit % d else: infotext = None res = SearchPage( _('search results'), query, t, num, content=spane, nav_menus=[TimeMenu(default=time), SearchSortMenu(default=sort)], search_params=dict(sort=sort, t=time), infotext=infotext).render() return res
def GET_search(self, query, num, time, reverse, after, count, langs, sort): """Search links page.""" if query and '.' in query: url = sanitize_url(query, require_scheme=True) if url: return self.redirect("/submit" + query_string({'url': url})) if langs and self.verify_langs_regex.match(langs): langs = langs.split(',') else: langs = c.content_langs subreddits = None authors = None if c.site == subreddit.Friends and c.user_is_loggedin and c.user.friends: authors = c.user.friends elif isinstance(c.site, MultiReddit): subreddits = c.site.sr_ids elif not isinstance(c.site, FakeSubreddit): subreddits = [c.site._id] q = LinkSearchQuery(q=query, timerange=time, langs=langs, subreddits=subreddits, authors=authors, sort=SearchSortMenu.operator(sort)) num, t, spane = self._search(q, num=num, after=after, reverse=reverse, count=count) if not isinstance(c.site, FakeSubreddit) and not c.cname: all_reddits_link = "%s/search%s" % (subreddit.All.path, query_string({'q': query})) d = { 'reddit_name': c.site.name, 'reddit_link': "http://%s/" % get_domain(cname=c.cname), 'all_reddits_link': all_reddits_link } infotext = strings.searching_a_reddit % d else: infotext = None res = SearchPage( _('search results'), query, t, num, content=spane, nav_menus=[TimeMenu(default=time), SearchSortMenu(default=sort)], search_params=dict(sort=sort, t=time), infotext=infotext).render() return res
def GET_search(self, query, num, reverse, after, count, sort, restrict_sr, syntax): """Search links page.""" if query and "." in query: url = sanitize_url(query, require_scheme=True) if url: return self.redirect("/submit" + query_string({"url": url})) if not restrict_sr: site = DefaultSR() else: site = c.site if not syntax: syntax = SearchQuery.default_syntax try: cleanup_message = None try: q = SearchQuery(query, site, sort, syntax=syntax) results, etime, spane = self._search(q, num=num, after=after, reverse=reverse, count=count) except InvalidQuery: # Clean the search of characters that might be causing the # InvalidQuery exception. If the cleaned search boils down # to an empty string, the search code is expected to bail # out early with an empty result set. cleaned = re.sub("[^\w\s]+", " ", query) cleaned = cleaned.lower().strip() q = SearchQuery(cleaned, site, sort) results, etime, spane = self._search(q, num=num, after=after, reverse=reverse, count=count) if cleaned: cleanup_message = strings.invalid_search_query % {"clean_query": cleaned} cleanup_message += " " cleanup_message += strings.search_help % {"search_help": self.search_help_page} else: cleanup_message = strings.completely_invalid_search_query res = SearchPage( _("search results"), query, etime, results.hits, content=spane, nav_menus=[SearchSortMenu(default=sort)], search_params=dict(sort=sort), infotext=cleanup_message, simple=False, site=c.site, restrict_sr=restrict_sr, syntax=syntax, converted_data=q.converted_data, facets=results.subreddit_facets, sort=sort, ).render() return res except SearchException + (socket.error,) as e: return self.search_fail(e)
def find_dups(new,fast=True): """Check to see if anyone else is using the same feed... If they are return the category, if not return none""" rss = get_sr_rss() new = sanitize_url(new) for k in [k for k in rss.keys() if rss[k]]: print rss[k] if link_equals(rss[k],new,fast=fast): return k return None
def GET_search(self, query, num, reverse, after, count, sort, restrict_sr, syntax): """Search links page.""" if query and '.' in query: url = sanitize_url(query, require_scheme = True) if url: return self.redirect("/submit" + query_string({'url':url})) if not restrict_sr: site = DefaultSR() else: site = c.site if not syntax: syntax = SearchQuery.default_syntax try: cleanup_message = None try: q = SearchQuery(query, site, sort, syntax=syntax) num, t, spane = self._search(q, num=num, after=after, reverse = reverse, count = count) except InvalidQuery: # strip the query down to a whitelist cleaned = re.sub("[^\w\s]+", " ", query) cleaned = cleaned.lower() # if it was nothing but mess, we have to stop if not cleaned.strip(): num, t, spane = 0, 0, [] cleanup_message = strings.completely_invalid_search_query else: q = SearchQuery(cleaned, site, sort) num, t, spane = self._search(q, num=num, after=after, reverse=reverse, count=count) cleanup_message = strings.invalid_search_query % { "clean_query": cleaned } cleanup_message += " " cleanup_message += strings.search_help % {"search_help": self.search_help_page } res = SearchPage(_('search results'), query, t, num, content=spane, nav_menus=[SearchSortMenu(default=sort)], search_params=dict(sort=sort), infotext=cleanup_message, simple=False, site=c.site, restrict_sr=restrict_sr, syntax=syntax, converted_data=q.converted_data ).render() return res except SearchException + (socket.error,) as e: return self.search_fail(e)
def fetch_feed(rss): if rss: rss=sanitize_url(rss) if rss: try: return feedparser.parse(rss) except: pass print "Invalid feed." return None
def GET_search(self, query, num, reverse, after, count, sort, restrict_sr): """Search links page.""" if query and '.' in query: url = sanitize_url(query, require_scheme=True) if url: return self.redirect("/submit" + query_string({'url': url})) if not restrict_sr: site = DefaultSR() else: site = c.site try: cleanup_message = None try: q = IndextankQuery(query, site, sort) num, t, spane = self._search(q, num=num, after=after, reverse=reverse, count=count) except InvalidIndextankQuery: # delete special characters from the query and run again special_characters = '+-&|!(){}[]^"~*?:\\' translation = dict( (ord(char), None) for char in list(special_characters)) cleaned = query.translate(translation) q = IndextankQuery(cleaned, site, sort) num, t, spane = self._search(q, num=num, after=after, reverse=reverse, count=count) cleanup_message = _('I couldn\'t understand your query, ' + 'so I simplified it and searched for ' + '"%(clean_query)s" instead.') % { 'clean_query': cleaned } res = SearchPage(_('search results'), query, t, num, content=spane, nav_menus=[SearchSortMenu(default=sort)], search_params=dict(sort=sort), infotext=cleanup_message, simple=False, site=c.site, restrict_sr=restrict_sr).render() return res except (IndextankException, socket.error), e: return self.search_fail(e)
def _extract_isolated_urls(md): """Extract URLs that exist on their own lines in given markdown. This style borrowed from wordpress, which is nice because it's tolerant to failures and is easy to understand. See https://codex.wordpress.org/Embeds """ urls = [] for line in md.splitlines(): url = sanitize_url(line, require_scheme=True) if url and url != "self": urls.append(url) return urls
def GET_search(self, query, num, reverse, after, count, sort, restrict_sr): """Search links page.""" if query and '.' in query: url = sanitize_url(query, require_scheme = True) if url: return self.redirect("/submit" + query_string({'url':url})) if not restrict_sr: site = DefaultSR() else: site = c.site try: cleanup_message = None try: q = IndextankQuery(query, site, sort) num, t, spane = self._search(q, num=num, after=after, reverse = reverse, count = count) except InvalidIndextankQuery: # strip the query down to a whitelist cleaned = re.sub("[^\w\s]+", "", query) cleaned = cleaned.lower() # if it was nothing but mess, we have to stop if not cleaned.strip(): num, t, spane = 0, 0, [] cleanup_message = strings.completely_invalid_search_query else: q = IndextankQuery(cleaned, site, sort) num, t, spane = self._search(q, num=num, after=after, reverse=reverse, count=count) cleanup_message = strings.invalid_search_query % { "clean_query": cleaned } cleanup_message += " " cleanup_message += strings.search_help % {"search_help": self.search_help_page } res = SearchPage(_('search results'), query, t, num, content=spane, nav_menus = [SearchSortMenu(default=sort)], search_params = dict(sort = sort), infotext=cleanup_message, simple=False, site=c.site, restrict_sr=restrict_sr).render() return res except (IndextankException, socket.error), e: return self.search_fail(e)
def demangle_url(path): # there's often some URL mangling done by the stack above us, so # let's clean up the URL before looking it up path = strip_sr.sub('', path) path = strip_s_path.sub('', path) path = leading_slash.sub("", path) if not has_protocol.match(path): path = 'http://%s' % path if need_insert_slash.match(path): path = string.replace(path, '/', '//', 1) path = utils.sanitize_url(path) return path
def run(self, url, sr): sr = Subreddit._by_name(sr) if not url: return self.error(errors.NO_URL) url = utils.sanitize_url(url) if url == 'self': return url elif url: try: l = Link._by_url(url, sr) self.error(errors.ALREADY_SUB) return l.url except NotFound: return url return self.error(errors.BAD_URL)
def validate_link(url,whitelist=False): if url: url=sanitize_url(url) if url: if whitelist and domain(url) not in DOMAIN_WHITELIST: print "Domain %s not in whitelist." % domain(url) return False try: lbu = LinksByUrl._byID(LinksByUrl._key_from_url(url)) except tdb_cassandra.NotFound: return url link_id36s = lbu._values() links = Link._byID36(link_id36s, data=True, return_dict=False) links = [l for l in links if not l._deleted] if len(links)==0: return url print "Link %s exists..." % url return False
def GET_search(self, query, num, reverse, after, count, sort, restrict_sr): """Search links page.""" if query and '.' in query: url = sanitize_url(query, require_scheme = True) if url: return self.redirect("/submit" + query_string({'url':url})) if not restrict_sr: site = DefaultSR() else: site = c.site try: cleanup_message = None try: q = IndextankQuery(query, site, sort) num, t, spane = self._search(q, num=num, after=after, reverse = reverse, count = count) except InvalidIndextankQuery: # delete special characters from the query and run again special_characters = '+-&|!(){}[]^"~*?:\\' translation = dict((ord(char), None) for char in list(special_characters)) cleaned = query.translate(translation) q = IndextankQuery(cleaned, site, sort) num, t, spane = self._search(q, num=num, after=after, reverse = reverse, count = count) cleanup_message = _('I couldn\'t understand your query, ' + 'so I simplified it and searched for ' + '"%(clean_query)s" instead.') % { 'clean_query': cleaned } res = SearchPage(_('search results'), query, t, num, content=spane, nav_menus = [SearchSortMenu(default=sort)], search_params = dict(sort = sort), infotext=cleanup_message, simple=False, site=c.site, restrict_sr=restrict_sr).render() return res except (IndextankException, socket.error), e: return self.search_fail(e)
def GET_search(self, query, num, time, reverse, after, count, langs): """Search links page.""" if query and '.' in query: url = sanitize_url(query, require_scheme = True) if url: return self.redirect("/submit" + query_string({'url':url})) if langs and self.verify_langs_regex.match(langs): langs = langs.split(',') else: langs = None num, t, spane = self._search(query, time=time, num = num, after = after, reverse = reverse, count = count, types = [Link]) res = SearchPage(_('search results'), query, t, num, content=spane, nav_menus = [TimeMenu(default = time)]).render() return res
def demangle_url(path): # there's often some URL mangling done by the stack above us, so # let's clean up the URL before looking it up path = strip_sr.sub('', path) path = strip_s_path.sub('', path) path = leading_slash.sub("", path) if has_protocol.match(path): if not allowed_protocol.match(path): return None else: path = '%s://%s' % (g.default_scheme, path) if need_insert_slash.match(path): path = string.replace(path, '/', '//', 1) try: path = utils.sanitize_url(path) except TypeError: return None return path
def GET_search(self, query, num, reverse, after, count, sort): """Search links page.""" if query and '.' in query: url = sanitize_url(query, require_scheme=True) if url: return self.redirect("/submit" + query_string({'url': url})) q = IndextankQuery(query, c.site, sort) num, t, spane = self._search(q, num=num, after=after, reverse=reverse, count=count) if not isinstance(c.site, FakeSubreddit) and not c.cname: all_reddits_link = "%s/search%s" % (subreddit.All.path, query_string({'q': query})) d = { 'reddit_name': c.site.name, 'reddit_link': "http://%s/" % get_domain(cname=c.cname), 'all_reddits_link': all_reddits_link } infotext = strings.searching_a_reddit % d else: infotext = None res = SearchPage(_('search results'), query, t, num, content=spane, nav_menus=[SearchSortMenu(default=sort)], search_params=dict(sort=sort), infotext=infotext).render() return res
def run(self, url): return utils.sanitize_url(url)