Exemplo n.º 1
0
    def _get_person_tags(self, obj):
        """Extract person tags that refer to Flickr users.

    Uses https://www.flickr.com/services/api/flickr.urls.lookupUser.html
    to find the NSID for a particular URL.

    Args:
      obj: ActivityStreams object that may contain person targets

    Returns:
      a sequence of ActivityStream person objects augmented with 'id' equal to
      the Flickr user's NSID
    """
        people = {}  # maps id to tag
        for tag in obj.get('tags', []):
            url = tag.get('url', '')
            if (util.domain_from_link(url) == 'flickr.com'
                    and tag.get('objectType') == 'person'):
                resp = self.call_api_method('flickr.urls.lookupUser',
                                            {'url': url})
                id = resp.get('user', {}).get('id')
                if id:
                    tag = copy.copy(tag)
                    tag['id'] = id
                    people[id] = tag
        return people.values()
Exemplo n.º 2
0
def redirect_unwrap(val):
    """Removes our redirect wrapping from a URL, if it's there.

    url may be a string, dict, or list. dicts and lists are unwrapped
    recursively.

    Strings that aren't wrapped URLs are left unchanged.

    Args:
      url: string

    Returns: string, unwrapped url
    """
    if isinstance(val, dict):
        return {k: redirect_unwrap(v) for k, v in val.items()}

    elif isinstance(val, list):
        return [redirect_unwrap(v) for v in val]

    elif isinstance(val, str):
        prefix = urllib.parse.urljoin(request.host_url, '/r/')
        if val.startswith(prefix):
            return util.follow_redirects(val[len(prefix):]).url
        elif val.startswith(request.host_url):
            domain = util.domain_from_link(
                urllib.parse.urlparse(val).path.strip('/'))
            return util.follow_redirects(domain).url

    return val
Exemplo n.º 3
0
  def __init__(self, instance, access_token, user_id=None,
               truncate_text_length=None):
    """Constructor.

    If user_id is not provided, it will be fetched via the API.

    Args:
      instance: string, base URL of Mastodon instance, eg https://mastodon.social/
      user_id: string or integer, optional, current user's id (not username!) on
        this instance
      access_token: string, optional OAuth access token
      truncate_text_length: int, optional character limit for toots, overrides
        the default of 500
    """
    assert instance
    self.instance = self.BASE_URL = instance
    assert access_token
    self.access_token = access_token
    self.TRUNCATE_TEXT_LENGTH = (
      truncate_text_length if truncate_text_length is not None
      else DEFAULT_TRUNCATE_TEXT_LENGTH)
    self.DOMAIN = util.domain_from_link(instance)

    if user_id:
      self.user_id = user_id
    else:
      creds = self._get(API_VERIFY_CREDENTIALS)
      self.user_id = creds['id']
Exemplo n.º 4
0
    def undo_follow(self, undo_unwrapped):
        """Replies to an AP Follow request with an Accept request.

        Args:
          undo_unwrapped: dict, AP Undo activity with redirect URLs unwrapped
        """
        logging.info('Undoing Follow')

        follow = undo_unwrapped.get('object', {})
        follower = follow.get('actor')
        followee = follow.get('object')
        if not follower or not followee:
            self.error(
                'Undo of Follow requires object with actor and object. Got: %s'
                % follow)

        # deactivate Follower
        user_domain = util.domain_from_link(followee)
        follower_obj = Follower.get_by_id(Follower._id(user_domain, follower))
        if follower_obj:
            logging.info('Marking %s as inactive' % follower_obj.key)
            follower_obj.status = 'inactive'
            follower_obj.put()
        else:
            logging.warning('No Follower found for %s %s', user_domain,
                            follower)
Exemplo n.º 5
0
def redir(to):
    """301 redirect to the embedded fully qualified URL.

    e.g. redirects /r/https://foo.com/bar?baz to https://foo.com/bar?baz
    """
    if request.args:
        to += '?' + urllib.parse.urlencode(request.args)
    # some browsers collapse repeated /s in the path down to a single slash.
    # if that happened to this URL, expand it back to two /s.
    to = re.sub(r'^(https?:/)([^/])', r'\1/\2', to)

    if not to.startswith('http://') and not to.startswith('https://'):
        error(f'Expected fully qualified URL; got {to}')

    # check that we've seen this domain before so we're not an open redirect
    domains = set(
        (util.domain_from_link(to), urllib.parse.urlparse(to).hostname))
    for domain in domains:
        if domain and MagicKey.get_by_id(domain):
            logging.info(f'Found MagicKey for domain {domain}')
            break
    else:
        logging.info(f'No user found for any of {domains}; returning 404')
        abort(404)

    # poor man's conneg, only handle single Accept values, not multiple with
    # priorities.
    if request.headers.get('Accept') in (common.CONTENT_TYPE_AS2,
                                         common.CONTENT_TYPE_AS2_LD):
        return convert_to_as2(to)

    # redirect
    logging.info(f'redirecting to {to}')
    return redirect(to, code=301)
Exemplo n.º 6
0
  def _get_person_tags(self, obj):
    """Extract person tags that refer to Flickr users.

    Uses https://www.flickr.com/services/api/flickr.urls.lookupUser.html
    to find the NSID for a particular URL.

    Args:
      obj: ActivityStreams object that may contain person targets

    Returns:
      a sequence of ActivityStream person objects augmented with 'id' equal to
      the Flickr user's NSID
    """
    people = {}  # maps id to tag
    for tag in obj.get('tags', []):
      url = tag.get('url', '')
      if (util.domain_from_link(url) == 'flickr.com' and
          tag.get('objectType') == 'person'):
        resp = self.call_api_method('flickr.urls.lookupUser', {'url': url})
        id = resp.get('user', {}).get('id')
        if id:
          tag = copy.copy(tag)
          tag['id'] = id
          people[id] = tag
    return people.values()
Exemplo n.º 7
0
    def user_to_actor(self, user):
        """Converts a GitHub user to an ActivityStreams actor.

    Handles both v4 GraphQL and v3 REST API user objects.

    https://developer.github.com/v4/object/user/
    https://developer.github.com/v3/users/

    Args:
      user: dict, decoded JSON GitHub user

    Returns:
      an ActivityStreams actor dict, ready to be JSON-encoded
    """
        actor = self._to_object(user)
        if not actor:
            return actor

        username = user.get('login')
        desc = user.get('bio') or user.get('description')

        actor.update({
            # TODO: orgs, bots
            'objectType': 'person',
            'displayName': user.get('name') or username,
            'username': username,
            'email': user.get('email'),
            'description': desc,
            'summary': desc,
            'image': {
                'url':
                user.get('avatarUrl') or user.get('avatar_url')
                or user.get('url')
            },
            'location': {
                'displayName': user.get('location')
            },
        })

        # extract web site links. extract_links uniquifies and preserves order
        urls = sum(
            (
                util.extract_links(user.get(field)) for field in (
                    'html_url',  # REST
                    'url',  # both
                    'websiteUrl',  # GraphQL
                    'blog',  # REST
                    'bio',  # both
                )),
            [])
        urls = [
            u for u in urls if util.domain_from_link(u) != 'api.github.com'
        ]
        if urls:
            actor['url'] = urls[0]
            if len(urls) > 1:
                actor['urls'] = [{'value': u} for u in urls]

        return self.postprocess_object(actor)
Exemplo n.º 8
0
def webmention_endpoint_cache_key(url):
  """Returns memcache key for a cached webmention endpoint for a given URL.

  Example: 'W https snarfed.org'
  """
  domain = util.domain_from_link(url)
  scheme = urlparse.urlparse(url).scheme
  return ' '.join(('W', scheme, domain))
Exemplo n.º 9
0
def webmention_endpoint_cache_key(url):
    """Returns memcache key for a cached webmention endpoint for a given URL.

  Example: 'W https snarfed.org'
  """
    domain = util.domain_from_link(url)
    scheme = urlparse.urlparse(url).scheme
    return ' '.join(('W', scheme, domain))
Exemplo n.º 10
0
    def accept_follow(self, follow, follow_unwrapped):
        """Replies to an AP Follow request with an Accept request.

        Args:
          follow: dict, AP Follow activity
          follow_unwrapped: dict, same, except with redirect URLs unwrapped
        """
        logging.info('Replying to Follow with Accept')

        followee = follow.get('object')
        followee_unwrapped = follow_unwrapped.get('object')
        follower = follow.get('actor')
        if not followee or not followee_unwrapped or not follower:
            common.error(
                self,
                'Follow activity requires object and actor. Got: %s' % follow)

        inbox = follower.get('inbox')
        follower_id = follower.get('id')
        if not inbox or not follower_id:
            common.error(self, 'Follow actor requires id and inbox. Got: %s',
                         follower)

        # store Follower
        user_domain = util.domain_from_link(followee_unwrapped)
        Follower.get_or_create(user_domain,
                               follower_id,
                               last_follow=json.dumps(follow))

        # send AP Accept
        accept = {
            '@context':
            'https://www.w3.org/ns/activitystreams',
            'id':
            util.tag_uri(appengine_config.HOST,
                         'accept/%s/%s' % ((user_domain, follow.get('id')))),
            'type':
            'Accept',
            'actor':
            followee,
            'object': {
                'type': 'Follow',
                'actor': follower_id,
                'object': followee,
            }
        }
        resp = send(accept, inbox, user_domain)
        self.response.status_int = resp.status_code
        self.response.write(resp.text)

        # send webmention
        common.send_webmentions(self,
                                as2.to_as1(follow),
                                proxy=True,
                                protocol='activitypub',
                                source_as2=json.dumps(follow_unwrapped))
Exemplo n.º 11
0
    def base_object(self, obj):
        """Returns the 'base' silo object that an object operates on.

    For example, if the object is a comment, this returns the post that it's a
    comment on. If it's an RSVP, this returns the event. The id in the returned
    object is silo-specific, ie not a tag URI.

    Subclasses may override this.

    Args:
      obj: ActivityStreams object

    Returns:
      dict, minimal ActivityStreams object. Usually has at least id; may
      also have url, author, etc.
    """
        # look at in-reply-tos first, then objects (for likes and reposts).
        # technically, the ActivityStreams 'object' field is always supposed to be
        # singular, but microformats2.json_to_object() sometimes returns activities
        # that have a list value, e.g. likes or reposts of multiple objects.
        candidates = []
        for field in ('inReplyTo', 'object'):
            objs = obj.get(field, [])
            if isinstance(objs, dict):
                candidates.append(objs)
            else:
                candidates += objs

        for base_obj in candidates:
            parsed_id = util.parse_tag_uri(base_obj.get('id', ''))
            if parsed_id:
                domain = parsed_id[0]
            else:
                domain = util.domain_from_link(base_obj.get('url', ''))
            if domain == self.DOMAIN:
                break
        else:
            return {}

        base_obj = copy.deepcopy(base_obj)
        id = base_obj.get('id')
        url = base_obj.get('url')

        if id:
            parsed = util.parse_tag_uri(id)
            if parsed:
                base_obj['id'] = parsed[1]
        elif url:
            base_obj['id'] = self.base_id(url)

        return base_obj
Exemplo n.º 12
0
    def base_object(self, obj):
        """Returns the 'base' silo object that an object operates on.

    For example, if the object is a comment, this returns the post that it's a
    comment on. If it's an RSVP, this returns the event. The id in the returned
    object is silo-specific, ie not a tag URI.

    Subclasses may override this.

    Args:
      obj: ActivityStreams object

    Returns: dict, minimal ActivityStreams object. Usually has at least id; may
      also have url, author, etc.
    """
        # look at in-reply-tos first, then objects (for likes and reposts).
        # technically, the ActivityStreams 'object' field is always supposed to be
        # singular, but microformats2.json_to_object() sometimes returns activities
        # that have a list value, e.g. likes or reposts of multiple objects.
        candidates = []
        for field in ("inReplyTo", "object"):
            objs = obj.get(field, [])
            if isinstance(objs, dict):
                candidates.append(objs)
            else:
                candidates += objs

        for base_obj in candidates:
            parsed_id = util.parse_tag_uri(base_obj.get("id", ""))
            if parsed_id:
                domain = parsed_id[0]
            else:
                domain = util.domain_from_link(base_obj.get("url", ""))
            if domain == self.DOMAIN:
                break
        else:
            return {}

        base_obj = copy.deepcopy(base_obj)
        id = base_obj.get("id")
        url = base_obj.get("url")

        if id:
            parsed = util.parse_tag_uri(id)
            if parsed:
                base_obj["id"] = parsed[1]
        elif url:
            path = urlparse.urlparse(url).path
            base_obj["id"] = path.rstrip("/").rsplit("/", 1)[-1]

        return base_obj
Exemplo n.º 13
0
  def user_to_actor(cls, user):
    """Converts a GitHub user to an ActivityStreams actor.

    Handles both v4 GraphQL and v3 REST API user objects.

    https://developer.github.com/v4/object/user/
    https://developer.github.com/v3/users/

    Args:
      user: dict, decoded JSON GitHub user

    Returns:
      an ActivityStreams actor dict, ready to be JSON-encoded
    """
    actor = cls._to_object(user)
    if not actor:
      return actor

    username = user.get('login')
    desc = user.get('bio') or user.get('description')

    actor.update({
      # TODO: orgs, bots
      'objectType': 'person',
      'displayName': user.get('name') or username,
      'username': username,
      'email': user.get('email'),
      'description': desc,
      'summary': desc,
      'image': {'url': user.get('avatarUrl') or user.get('avatar_url') or user.get('url')},
      'location': {'displayName': user.get('location')},
    })

    # extract web site links. extract_links uniquifies and preserves order
    urls = sum((util.extract_links(user.get(field)) for field in (
      'html_url',  # REST
      'url',  # both
      'websiteUrl',  # GraphQL
      'blog',  # REST
      'bio',   # both
    )), [])
    urls = [u for u in urls if util.domain_from_link(u) != 'api.github.com']
    if urls:
      actor['url'] = urls[0]
      if len(urls) > 1:
        actor['urls'] = [{'value': u} for u in urls]

    return cls.postprocess_object(actor)
Exemplo n.º 14
0
  def base_object(self, obj):
    """Returns the 'base' silo object that an object operates on.

    For example, if the object is a comment, this returns the post that it's a
    comment on. If it's an RSVP, this returns the event. The id in the returned
    object is silo-specific, ie not a tag URI.

    Subclasses may override this.

    Args:
      obj: ActivityStreams object

    Returns:
      dict, minimal ActivityStreams object. Usually has at least id; may
      also have url, author, etc.
    """
    # look at in-reply-tos first, then objects (for likes and reposts).
    # technically, the ActivityStreams 'object' field is always supposed to be
    # singular, but microformats2.json_to_object() sometimes returns activities
    # that have a list value, e.g. likes or reposts of multiple objects.
    candidates = []
    for field in ('inReplyTo', 'object', 'target'):
      candidates += util.get_list(obj, field)

    for base_obj in candidates:
      parsed_id = util.parse_tag_uri(base_obj.get('id', ''))
      if parsed_id:
        domain = parsed_id[0]
      else:
        domain = util.domain_from_link(base_obj.get('url', ''))
      if domain == self.DOMAIN:
        break
    else:
      return {}

    base_obj = copy.deepcopy(base_obj)
    id = base_obj.get('id')
    url = base_obj.get('url')

    if id:
      parsed = util.parse_tag_uri(id)
      if parsed:
        base_obj['id'] = parsed[1]
    elif url:
      base_obj['id'] = self.base_id(url)

    return base_obj
Exemplo n.º 15
0
def webmention_endpoint_cache_key(url):
    """Returns cache key for a cached webmention endpoint for a given URL.

  Example: 'W https snarfed.org /'

  If the URL is the home page, ie path is / , the key includes a / at the end,
  so that we cache webmention endpoints for home pages separate from other pages.
  https://github.com/snarfed/bridgy/issues/701
  """
    domain = util.domain_from_link(url)
    scheme = urllib.parse.urlparse(url).scheme

    parts = ['W', scheme, domain]
    if urllib.parse.urlparse(url).path in ('', '/'):
        parts.append('/')

    return ' '.join(parts)
Exemplo n.º 16
0
def webmention_endpoint_cache_key(url):
  """Returns memcache key for a cached webmention endpoint for a given URL.

  Example: 'W https snarfed.org /'

  If the URL is the home page, ie path is / , the key includes a / at the end,
  so that we cache webmention endpoints for home pages separate from other pages.
  https://github.com/snarfed/bridgy/issues/701
  """
  domain = util.domain_from_link(url)
  scheme = urlparse.urlparse(url).scheme

  parts = ['W', scheme, domain]
  if urlparse.urlparse(url).path in ('', '/'):
    parts.append('/')

  return ' '.join(parts)
Exemplo n.º 17
0
def send(activity, inbox_url, user_domain):
    """Sends an ActivityPub request to an inbox.

    Args:
      activity: dict, AS2 activity
      inbox_url: string
      user_domain: string, domain of the bridgy fed user sending the request

    Returns:
      requests.Response
    """
    logging.info(
        'Sending AP request from {user_domain}: {json_dumps(activity, indent=2)}'
    )

    # prepare HTTP Signature (required by Mastodon)
    # https://w3c.github.io/activitypub/#authorization
    # https://tools.ietf.org/html/draft-cavage-http-signatures-07
    # https://github.com/tootsuite/mastodon/issues/4906#issuecomment-328844846
    acct = 'acct:%s@%s' % (user_domain, user_domain)
    key = MagicKey.get_or_create(user_domain)
    auth = HTTPSignatureAuth(secret=key.private_pem(),
                             key_id=acct,
                             algorithm='rsa-sha256',
                             sign_header='signature',
                             headers=('Date', 'Digest', 'Host'))

    # deliver to inbox
    body = json_dumps(activity).encode()
    headers = {
        'Content-Type': common.CONTENT_TYPE_AS2,
        # required for HTTP Signature
        # https://tools.ietf.org/html/draft-cavage-http-signatures-07#section-2.1.3
        'Date':
        datetime.datetime.utcnow().strftime('%a, %d %b %Y %H:%M:%S GMT'),
        # required by Mastodon
        # https://github.com/tootsuite/mastodon/pull/14556#issuecomment-674077648
        'Digest': 'SHA-256=' + b64encode(sha256(body).digest()).decode(),
        'Host': util.domain_from_link(inbox_url),
    }
    return common.requests_post(inbox_url,
                                data=body,
                                auth=auth,
                                headers=headers)
Exemplo n.º 18
0
    def __init__(self, instance, access_token, user_id=None):
        """Constructor.

    If user_id is not provided, it will be fetched via the API.

    Args:
      instance: string, base URL of Mastodon instance, eg https://mastodon.social/
      user_id: string or integer, optional, current user's id (not username!) on
        this instance
      access_token: string, optional OAuth access token
    """
        assert instance
        self.instance = self.BASE_URL = instance
        assert access_token
        self.access_token = access_token
        self.DOMAIN = util.domain_from_link(instance)

        if user_id:
            self.user_id = user_id
        else:
            creds = self._get(API_VERIFY_CREDENTIALS)
            self.user_id = creds['id']
Exemplo n.º 19
0
def redirect_unwrap(val):
    """Removes our redirect wrapping from a URL, if it's there.

    url may be a string, dict, or list. dicts and lists are unwrapped
    recursively.

    Strings that aren't wrapped URLs are left unchanged.
    """
    if isinstance(val, dict):
        return {k: redirect_unwrap(v) for k, v in val.items()}

    elif isinstance(val, list):
        return [redirect_unwrap(v) for v in val]

    elif isinstance(val, basestring):
        if val.startswith(REDIRECT_PREFIX):
            return val[len(REDIRECT_PREFIX):]
        elif val.startswith(appengine_config.HOST_URL):
            return util.follow_redirects(
                util.domain_from_link(urlparse.urlparse(val).path.strip('/')),
                cache=memcache).url

    return val
Exemplo n.º 20
0
    def original_post_discovery(activity, domains=None, cache=None, include_redirect_sources=True, **kwargs):
        """Discovers original post links.

    This is a variation on http://indiewebcamp.com/original-post-discovery . It
    differs in that it finds multiple candidate links instead of one, and it
    doesn't bother looking for MF2 (etc) markup because the silos don't let you
    input it. More background:
    https://github.com/snarfed/bridgy/issues/51#issuecomment-136018857

    Original post candidates come from the upstreamDuplicates, attachments, and
    tags fields, as well as links and permashortlinks/permashortcitations in the
    text content.

    Args:
      activity: activity dict
      domains: optional sequence of domains. If provided, only links to these
        domains will be considered original and stored in upstreamDuplicates.
        (Permashortcitations are exempt.)
      cache: optional, a cache object for storing resolved URL redirects. Passed
        to follow_redirects().
      include_redirect_sources: boolean, whether to include URLs that redirect
        as well as their final destination URLs
      kwargs: passed to requests.head() when following redirects

    Returns: ([string original post URLs], [string mention URLs]) tuple
    """
        obj = activity.get("object") or activity
        content = obj.get("content", "").strip()

        # find all candidate URLs
        tags = [
            t.get("url")
            for t in obj.get("attachments", []) + obj.get("tags", [])
            if t.get("objectType") in ("article", "mention", None)
        ]
        candidates = tags + util.extract_links(content) + obj.get("upstreamDuplicates", [])

        # Permashortcitations (http://indiewebcamp.com/permashortcitation) are short
        # references to canonical copies of a given (usually syndicated) post, of
        # the form (DOMAIN PATH). We consider them an explicit original post link.
        candidates += [match.expand(r"http://\1/\2") for match in Source._PERMASHORTCITATION_RE.finditer(content)]

        candidates = set(
            filter(
                None,
                (
                    util.clean_url(url)
                    for url in candidates
                    # heuristic: ellipsized URLs are probably incomplete, so omit them.
                    if url and not url.endswith("...") and not url.endswith(u"…")
                ),
            )
        )

        # check for redirect and add their final urls
        redirects = {}  # maps final URL to original URL for redirects
        for url in list(candidates):
            resolved = follow_redirects(url, cache=cache, **kwargs)
            if resolved.url != url and resolved.headers.get("content-type", "").startswith("text/html"):
                redirects[resolved.url] = url
                candidates.add(resolved.url)

        # use domains to determine which URLs are original post links vs mentions
        originals = set()
        mentions = set()
        for url in util.dedupe_urls(candidates):
            if url in redirects.values():
                # this is a redirected original URL. postpone and handle it when we hit
                # its final URL so that we know the final domain.
                continue
            which = originals if not domains or util.domain_from_link(url) in domains else mentions
            which.add(url)
            redirected_from = redirects.get(url)
            if redirected_from and include_redirect_sources:
                which.add(redirected_from)

        logging.info("Original post discovery found original posts %s, mentions %s", originals, mentions)
        return originals, mentions
Exemplo n.º 21
0
def host_url(handler):
  domain = util.domain_from_link(handler.request.host_url)
  return HOST_URL if domain in OTHER_DOMAINS else handler.request.host_url
Exemplo n.º 22
0
def host_url(handler):
    domain = util.domain_from_link(handler.request.host_url)
    return (HOST_URL if util.domain_or_parent_in(domain, OTHER_DOMAINS) else
            handler.request.host_url)
Exemplo n.º 23
0
def host_url(handler):
  domain = util.domain_from_link(handler.request.host_url)
  return HOST_URL if domain in OTHER_DOMAINS else handler.request.host_url
Exemplo n.º 24
0
def request_headers(url=None, source=None):
  if (url and util.domain_from_link(url) in CONNEG_DOMAINS or
      source and source.bridgy_path() in CONNEG_PATHS):
    return REQUEST_HEADERS_CONNEG

  return REQUEST_HEADERS
Exemplo n.º 25
0
def send_webmentions(activity_wrapped, proxy=None, **response_props):
    """Sends webmentions for an incoming Salmon slap or ActivityPub inbox delivery.
    Args:
      activity_wrapped: dict, AS1 activity
      response_props: passed through to the newly created Responses
    """
    activity = redirect_unwrap(activity_wrapped)

    verb = activity.get('verb')
    if verb and verb not in SUPPORTED_VERBS:
        error(f'{verb} activities are not supported yet.')

    # extract source and targets
    source = activity.get('url') or activity.get('id')
    obj = activity.get('object')
    obj_url = util.get_url(obj)

    targets = util.get_list(activity, 'inReplyTo')
    if isinstance(obj, dict):
        if not source or verb in ('create', 'post', 'update'):
            source = obj_url or obj.get('id')
        targets.extend(util.get_list(obj, 'inReplyTo'))

    tags = util.get_list(activity_wrapped, 'tags')
    obj_wrapped = activity_wrapped.get('object')
    if isinstance(obj_wrapped, dict):
        tags.extend(util.get_list(obj_wrapped, 'tags'))
    for tag in tags:
        if tag.get('objectType') == 'mention':
            url = tag.get('url')
            if url and url.startswith(request.host_url):
                targets.append(redirect_unwrap(url))

    if verb in ('follow', 'like', 'share'):
        targets.append(obj_url)

    targets = util.dedupe_urls(util.get_url(t) for t in targets)
    if not source:
        error("Couldn't find original post URL")
    if not targets:
        error(
            "Couldn't find any target URLs in inReplyTo, object, or mention tags"
        )

    # send webmentions and store Responses
    errors = []  # stores (code, body) tuples
    for target in targets:
        if util.domain_from_link(target) == util.domain_from_link(source):
            logging.info(
                f'Skipping same-domain webmention from {source} to {target}')
            continue

        response = Response(source=source,
                            target=target,
                            direction='in',
                            **response_props)
        response.put()
        wm_source = (response.proxy_url() if
                     verb in ('follow', 'like', 'share') or proxy else source)
        logging.info(f'Sending webmention from {wm_source} to {target}')

        try:
            endpoint = webmention.discover(target, headers=HEADERS).endpoint
            if endpoint:
                webmention.send(endpoint, wm_source, target, headers=HEADERS)
                response.status = 'complete'
                logging.info('Success!')
            else:
                response.status = 'ignored'
                logging.info('Ignoring.')
        except BaseException as e:
            errors.append(util.interpret_http_exception(e))
        response.put()

    if errors:
        msg = 'Errors: ' + ', '.join(f'{code} {body}' for code, body in errors)
        error(msg, status=int(errors[0][0] or 502))
Exemplo n.º 26
0
    def original_post_discovery(activity,
                                domains=None,
                                cache=None,
                                include_redirect_sources=True,
                                **kwargs):
        """Discovers original post links.

    This is a variation on http://indiewebcamp.com/original-post-discovery . It
    differs in that it finds multiple candidate links instead of one, and it
    doesn't bother looking for MF2 (etc) markup because the silos don't let you
    input it. More background:
    https://github.com/snarfed/bridgy/issues/51#issuecomment-136018857

    Original post candidates come from the upstreamDuplicates, attachments, and
    tags fields, as well as links and permashortlinks/permashortcitations in the
    text content.

    Args:
      activity: activity dict
      domains: optional sequence of domains. If provided, only links to these
        domains will be considered original and stored in upstreamDuplicates.
        (Permashortcitations are exempt.)
      cache: optional, a cache object for storing resolved URL redirects. Passed
        to follow_redirects().
      include_redirect_sources: boolean, whether to include URLs that redirect
        as well as their final destination URLs
      kwargs: passed to requests.head() when following redirects

    Returns:
      ([string original post URLs], [string mention URLs]) tuple
    """
        obj = activity.get('object') or activity
        content = obj.get('content', '').strip()

        # find all candidate URLs
        tags = [
            t.get('url')
            for t in obj.get('attachments', []) + obj.get('tags', [])
            if t.get('objectType') in ('article', 'mention', None)
        ]
        candidates = tags + util.extract_links(content) + obj.get(
            'upstreamDuplicates', [])

        # Permashortcitations (http://indiewebcamp.com/permashortcitation) are short
        # references to canonical copies of a given (usually syndicated) post, of
        # the form (DOMAIN PATH). We consider them an explicit original post link.
        candidates += [
            match.expand(r'http://\1/\2')
            for match in Source._PERMASHORTCITATION_RE.finditer(content)
        ]

        candidates = set(
            filter(
                None,
                (
                    util.clean_url(url) for url in candidates
                    # heuristic: ellipsized URLs are probably incomplete, so omit them.
                    if url and not url.endswith('...')
                    and not url.endswith(u'…'))))

        # check for redirect and add their final urls
        redirects = {}  # maps final URL to original URL for redirects
        for url in list(candidates):
            resolved = util.follow_redirects(url, cache=cache, **kwargs)
            if (resolved.url != url and resolved.headers.get(
                    'content-type', '').startswith('text/html')):
                redirects[resolved.url] = url
                candidates.add(resolved.url)

        # use domains to determine which URLs are original post links vs mentions
        originals = set()
        mentions = set()
        for url in util.dedupe_urls(candidates):
            if url in redirects.values():
                # this is a redirected original URL. postpone and handle it when we hit
                # its final URL so that we know the final domain.
                continue
            domain = util.domain_from_link(url)
            which = (originals if not domains or util.domain_or_parent_in(
                domain, domains) else mentions)
            which.add(url)
            redirected_from = redirects.get(url)
            if redirected_from and include_redirect_sources:
                which.add(redirected_from)

        logging.info(
            'Original post discovery found original posts %s, mentions %s',
            originals, mentions)
        return originals, mentions
Exemplo n.º 27
0
def request_headers(url=None, source=None):
    if (url and util.domain_from_link(url) in CONNEG_DOMAINS
            or source and source.bridgy_path() in CONNEG_PATHS):
        return REQUEST_HEADERS_CONNEG

    return {}
Exemplo n.º 28
0
def host_url(path_query=None):
    domain = util.domain_from_link(request.host_url)
    base = (HOST_URL if util.domain_or_parent_in(domain, OTHER_DOMAINS) else
            request.host_url)
    return urllib.parse.urljoin(base, path_query)
Exemplo n.º 29
0
def send_webmentions(handler, activity_wrapped, proxy=None, **response_props):
    """Sends webmentions for an incoming Salmon slap or ActivityPub inbox delivery.
    Args:
      handler: RequestHandler
      activity_wrapped: dict, AS1 activity
      response_props: passed through to the newly created Responses
    """
    activity = common.redirect_unwrap(activity_wrapped)

    verb = activity.get('verb')
    if verb and verb not in SUPPORTED_VERBS:
        error(handler, '%s activities are not supported yet.' % verb)

    # extract source and targets
    source = activity.get('url') or activity.get('id')
    obj = activity.get('object')
    obj_url = util.get_url(obj)

    targets = util.get_list(activity, 'inReplyTo')
    if isinstance(obj, dict):
        if not source or verb in ('create', 'post', 'update'):
            source = obj_url or obj.get('id')
        targets.extend(util.get_list(obj, 'inReplyTo'))

    tags = util.get_list(activity_wrapped, 'tags')
    obj_wrapped = activity_wrapped.get('object')
    if isinstance(obj_wrapped, dict):
        tags.extend(util.get_list(obj_wrapped, 'tags'))
    for tag in tags:
        if tag.get('objectType') == 'mention':
            url = tag.get('url')
            if url and url.startswith(appengine_config.HOST_URL):
                targets.append(redirect_unwrap(url))

    if verb in ('follow', 'like', 'share'):
         targets.append(obj_url)

    targets = util.dedupe_urls(util.get_url(t) for t in targets)
    if not source:
        error(handler, "Couldn't find original post URL")
    if not targets:
        error(handler, "Couldn't find any target URLs in inReplyTo, object, or mention tags")

    # send webmentions and store Responses
    errors = []
    for target in targets:
        if util.domain_from_link(target) == util.domain_from_link(source):
            logging.info('Skipping same-domain webmention from %s to %s',
                         source, target)
            continue

        response = Response(source=source, target=target, direction='in',
                            **response_props)
        response.put()
        wm_source = (response.proxy_url()
                     if verb in ('follow', 'like', 'share') or proxy
                     else source)
        logging.info('Sending webmention from %s to %s', wm_source, target)

        wm = send.WebmentionSend(wm_source, target)
        if wm.send(headers=HEADERS):
            logging.info('Success: %s', wm.response)
            response.status = 'complete'
        else:
            logging.warning('Failed: %s', wm.error)
            errors.append(wm.error)
            response.status = 'error'
        response.put()

    if errors:
        msg = 'Errors:\n' + '\n'.join(json.dumps(e, indent=2) for e in errors)
        error(handler, msg, status=errors[0].get('http_status'))