def search_for_links(self): """Searches for activities with links to any of this source's web sites. Only searches for root domain web site URLs! Skips URLs with paths; they tend to generate false positive results in G+'s search. Not sure why yet. G+ search supports OR: https://developers.google.com/+/api/latest/activities/search Returns: sequence of ActivityStreams activity dicts """ urls = [ '"%s"' % util.fragmentless(url) for url in self.domain_urls if not util.in_webmention_blacklist(util.domain_from_link(url)) and urlparse.urlparse(url).path in ('', '/') ][:models.MAX_AUTHOR_URLS] if urls: return self.get_activities(search_query=' OR '.join(urls), group_id=gr_source.SEARCH, etag=self.last_activities_etag, fetch_replies=False, fetch_likes=False, fetch_shares=False, count=50) return []
def test_fragmentless(self): for expected, url in ( ('', ''), ('/path', '/path'), ('http://foo', 'http://foo'), ('http://foo', 'http://foo#bar'), ('http://foo/bar?baz', 'http://foo/bar?baz#baj'), ): self.assertEqual(expected, util.fragmentless(url))
def search_for_links(self): """Searches for activities with links to any of this source's web sites. G+ search supports OR: https://developers.google.com/+/api/latest/activities/search Returns: sequence of ActivityStreams activity dicts """ query = ' OR '.join( '"%s"' % util.fragmentless(url) for url in self.domain_urls if not util.in_webmention_blacklist(util.domain_from_link(url))) return self.get_activities( search_query=query, group_id=gr_source.SEARCH, etag=self.last_activities_etag, fetch_replies=False, fetch_likes=False, fetch_shares=False, count=50)
def search_for_links(self): """Searches for activities with links to any of this source's web sites. Twitter search supports OR: https://dev.twitter.com/rest/public/search ...but it only returns complete(ish) results if we strip scheme from URLs, ie search for example.com instead of http://example.com/, and that also returns false positivies, so we check that the returned tweets actually have matching links. https://github.com/snarfed/bridgy/issues/565 Returns: sequence of ActivityStreams activity dicts """ urls = set( util.fragmentless(url) for url in self.domain_urls if not util.in_webmention_blacklist(util.domain_from_link(url))) if not urls: return [] query = ' OR '.join('"%s"' % util.schemeless(url, slashes=False) for url in urls) candidates = self.get_activities(search_query=query, group_id=gr_source.SEARCH, etag=self.last_activities_etag, fetch_replies=False, fetch_likes=False, fetch_shares=False, count=50) # filter out retweets and search false positives that don't actually link to us results = [] for candidate in candidates: if candidate.get('verb') == 'share': continue obj = candidate['object'] tags = obj.get('tags', []) atts = obj.get('attachments', []) for url in urls: if (url in obj.get('content', '') or any( t.get('url', '').startswith(url) for t in tags + atts)): id = candidate['id'] results.append(candidate) break return results
def search_for_links(self): """Searches for activities with links to any of this source's web sites. Returns: sequence of ActivityStreams activity dicts """ urls = {util.schemeless(util.fragmentless(url), slashes=False) for url in self.domain_urls if not util.in_webmention_blocklist(util.domain_from_link(url))} if not urls: return [] # Search syntax: https://www.reddit.com/wiki/search url_query = ' OR '.join(f'site:"{u}" OR selftext:"{u}"' for u in urls) return self.get_activities( search_query=url_query, group_id=gr_source.SEARCH, etag=self.last_activities_etag, fetch_replies=False, fetch_likes=False, fetch_shares=False, count=50)
def search_for_links(self): """Searches for activities with links to any of this source's web sites. Returns: sequence of ActivityStreams activity dicts """ urls = set( util.schemeless(util.fragmentless(url), slashes=False) for url in self.domain_urls if not util.in_webmention_blocklist(util.domain_from_link(url))) if not urls: return [] url_query = ' OR '.join([f'"{u}"' for u in urls]) return self.get_activities(search_query=url_query, group_id=gr_source.SEARCH, etag=self.last_activities_etag, fetch_replies=True, fetch_likes=False, fetch_shares=False, count=50)
def search_for_links(self): """Searches for activities with links to any of this source's web sites. Twitter search supports OR: https://dev.twitter.com/rest/public/search ...but it only returns complete(ish) results if we strip scheme from URLs, ie search for example.com instead of http://example.com/, and that also returns false positivies, so we check that the returned tweets actually have matching links. https://github.com/snarfed/bridgy/issues/565 Returns: sequence of ActivityStreams activity dicts """ urls = set(util.fragmentless(url) for url in self.domain_urls if not util.in_webmention_blacklist(util.domain_from_link(url))) if not urls: return [] query = ' OR '.join('"%s"' % util.schemeless(url, slashes=False) for url in urls) candidates = self.get_activities( search_query=query, group_id=gr_source.SEARCH, etag=self.last_activities_etag, fetch_replies=False, fetch_likes=False, fetch_shares=False, count=50) # filter out retweets and search false positives that don't actually link to us results = [] for candidate in candidates: if candidate.get('verb') == 'share': continue obj = candidate['object'] tags = obj.get('tags', []) atts = obj.get('attachments', []) for url in urls: if (url in obj.get('content', '') or any(t.get('url', '').startswith(url) for t in tags + atts)): id = candidate['id'] results.append(candidate) break return results
def search_for_links(self): """Searches for activities with links to any of this source's web sites. Only searches for root domain web site URLs! Skips URLs with paths; they tend to generate false positive results in G+'s search. Not sure why yet. G+ search supports OR: https://developers.google.com/+/api/latest/activities/search Returns: sequence of ActivityStreams activity dicts """ urls = ['"%s"' % util.fragmentless(url) for url in self.domain_urls if not util.in_webmention_blacklist(util.domain_from_link(url)) and urlparse.urlparse(url).path in ('', '/') ][:models.MAX_AUTHOR_URLS] if urls: return self.get_activities( search_query=' OR '.join(urls), group_id=gr_source.SEARCH, etag=self.last_activities_etag, fetch_replies=False, fetch_likes=False, fetch_shares=False, count=50) return []