Python get_text примеры использования

Язык программирования: Python

Пространство имен/Пакет: granary.microformats2

Метод/Функция: get_text

Примеров на hotexamples.com: 5

Python get_text - 5 примеров найдено. Это лучшие примеры Python кода для granary.microformats2.get_text, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Пример #1

Показать файл

    def attempt_single_item(self, item):
        """Attempts to preview or publish a single mf2 item.

    Args:
      item: mf2 item dict from mf2py

    Returns:
      CreationResult
    """
        self.maybe_inject_silo_content(item)
        obj = microformats2.json_to_object(item)

        ignore_formatting = self.ignore_formatting(item)
        if ignore_formatting:
            prop = microformats2.first_props(item.get('properties', {}))
            content = microformats2.get_text(prop.get('content'))
            if content:
                obj['content'] = content.strip()

        # which original post URL to include? in order of preference:
        # 1. rel-shortlink (background: https://github.com/snarfed/bridgy/issues/173)
        # 2. original user-provided URL if it redirected
        # 3. u-url if available
        # 4. actual final fetched URL
        if self.shortlink:
            obj['url'] = self.shortlink
        elif self.source_url() != self.fetched.url:
            obj['url'] = self.source_url()
        elif 'url' not in obj:
            obj['url'] = self.fetched.url
        logging.debug('Converted to ActivityStreams object: %s',
                      json.dumps(obj, indent=2))

        # posts and comments need content
        obj_type = obj.get('objectType')
        if obj_type in ('note', 'article', 'comment'):
            if (not obj.get('content') and not obj.get('summary')
                    and not obj.get('displayName')):
                return gr_source.creation_result(
                    abort=False,
                    error_plain='Could not find content in %s' %
                    self.fetched.url,
                    error_html=
                    'Could not find <a href="http://microformats.org/">content</a> in %s'
                    % self.fetched.url)

        self.preprocess(obj)

        include_link = self.include_link(item)

        if not self.authorize():
            return gr_source.creation_result(abort=True)

        # RIP Facebook comments/likes. https://github.com/snarfed/bridgy/issues/350
        if (isinstance(self.source, FacebookPage)
                and (obj_type == 'comment' or obj.get('verb') == 'like')):
            return gr_source.creation_result(
                abort=True,
                error_plain=
                'Facebook comments and likes are no longer supported. :(',
                error_html=
                '<a href="https://github.com/snarfed/bridgy/issues/350">'
                'Facebook comments and likes are no longer supported.</a> :(')

        if self.PREVIEW:
            result = self.source.gr_source.preview_create(
                obj,
                include_link=include_link,
                ignore_formatting=ignore_formatting)
            self.entity.published = result.content or result.description
            if not self.entity.published:
                return result  # there was an error
            state = {
                'source_key': self.source.key.urlsafe(),
                'source_url': self.source_url(),
                'target_url': self.target_url(),
                'include_link': include_link,
            }
            vars = {
                'source': self.preprocess_source(self.source),
                'preview': result.content,
                'description': result.description,
                'webmention_endpoint':
                self.request.host_url + '/publish/webmention',
                'state': self.encode_state_parameter(state),
            }
            vars.update(state)
            logging.info('Rendering preview with template vars %s',
                         pprint.pformat(vars))
            return gr_source.creation_result(
                template.render('templates/preview.html', vars))

        else:
            result = self.source.gr_source.create(
                obj,
                include_link=include_link,
                ignore_formatting=ignore_formatting)
            self.entity.published = result.content
            if not result.content:
                return result  # there was an error
            if 'url' not in self.entity.published:
                self.entity.published['url'] = obj.get('url')
            self.entity.type = self.entity.published.get(
                'type') or models.get_type(obj)
            self.entity.type_label = self.source.TYPE_LABELS.get(
                self.entity.type)
            self.response.headers['Content-Type'] = 'application/json'
            logging.info('Returning %s',
                         json.dumps(self.entity.published, indent=2))
            self.response.headers['Location'] = self.entity.published[
                'url'].encode('utf-8')
            self.response.status = 201
            return gr_source.creation_result(
                json.dumps(self.entity.published, indent=2))

Пример #2

Показать файл

  def attempt_single_item(self, item):
    """Attempts to preview or publish a single mf2 item.

    Args:
      item: mf2 item dict from mf2py

    Returns:
      CreationResult
    """
    self.maybe_inject_silo_content(item)
    obj = microformats2.json_to_object(item)

    ignore_formatting = self.ignore_formatting(item)
    if ignore_formatting:
      prop = microformats2.first_props(item.get('properties', {}))
      content = microformats2.get_text(prop.get('content'))
      if content:
        obj['content'] = content.strip()

    # which original post URL to include? in order of preference:
    # 1. rel-shortlink (background: https://github.com/snarfed/bridgy/issues/173)
    # 2. original user-provided URL if it redirected
    # 3. u-url if available
    # 4. actual final fetched URL
    if self.shortlink:
      obj['url'] = self.shortlink
    elif self.source_url() != self.fetched.url:
      obj['url'] = self.source_url()
    elif 'url' not in obj:
      obj['url'] = self.fetched.url
    logging.debug('Converted to ActivityStreams object: %s', json_dumps(obj, indent=2))

    # posts and comments need content
    obj_type = obj.get('objectType')
    if obj_type in ('note', 'article', 'comment'):
      if (not obj.get('content') and not obj.get('summary') and
          not obj.get('displayName')):
        return gr_source.creation_result(
          abort=False,
          error_plain='Could not find content in %s' % self.fetched.url,
          error_html='Could not find <a href="http://microformats.org/">content</a> in %s' % self.fetched.url)

    self.preprocess(obj)

    include_link = self.include_link(item)

    if not self.authorize():
      return gr_source.creation_result(abort=True)

    if self.PREVIEW:
      result = self.source.gr_source.preview_create(
        obj, include_link=include_link, ignore_formatting=ignore_formatting)
      previewed = result.content or result.description
      if self.entity.type == 'preview':
        self.entity.published = previewed
      if not previewed:
        return result  # there was an error
      return self._render_preview(result, include_link=include_link)

    else:
      result = self.source.gr_source.create(
        obj, include_link=include_link, ignore_formatting=ignore_formatting)
      self.entity.published = result.content
      if not result.content:
        return result  # there was an error
      if 'url' not in self.entity.published:
        self.entity.published['url'] = obj.get('url')
      self.entity.type = self.entity.published.get('type') or models.get_type(obj)
      self.response.headers['Content-Type'] = 'application/json'
      logging.info('Returning %s', json_dumps(self.entity.published, indent=2))
      self.response.headers['Location'] = self.entity.published['url']
      self.response.status = 201
      return gr_source.creation_result(
        json_dumps(self.entity.published, indent=2))

Пример #3

Показать файл

Файл: publish.py Проект: snarfed/bridgy

  def attempt_single_item(self, item):
    """Attempts to preview or publish a single mf2 item.

    Args:
      item: mf2 item dict from mf2py

    Returns:
      CreationResult
    """
    self.maybe_inject_silo_content(item)
    obj = microformats2.json_to_object(item)

    ignore_formatting = self.ignore_formatting(item)
    if ignore_formatting:
      prop = microformats2.first_props(item.get('properties', {}))
      content = microformats2.get_text(prop.get('content'))
      if content:
        obj['content'] = content.strip()

    # which original post URL to include? in order of preference:
    # 1. rel-shortlink (background: https://github.com/snarfed/bridgy/issues/173)
    # 2. original user-provided URL if it redirected
    # 3. u-url if available
    # 4. actual final fetched URL
    if self.shortlink:
      obj['url'] = self.shortlink
    elif self.source_url() != self.fetched.url:
      obj['url'] = self.source_url()
    elif 'url' not in obj:
      obj['url'] = self.fetched.url
    logging.debug('Converted to ActivityStreams object: %s', json.dumps(obj, indent=2))

    # posts and comments need content
    obj_type = obj.get('objectType')
    if obj_type in ('note', 'article', 'comment'):
      if (not obj.get('content') and not obj.get('summary') and
          not obj.get('displayName')):
        return gr_source.creation_result(
          abort=False,
          error_plain='Could not find content in %s' % self.fetched.url,
          error_html='Could not find <a href="http://microformats.org/">content</a> in %s' % self.fetched.url)

    self.preprocess(obj)

    include_link = self.include_link(item)

    if not self.authorize():
      return gr_source.creation_result(abort=True)

    # RIP Facebook.
    # https://github.com/snarfed/bridgy/issues/817
    # https://github.com/snarfed/bridgy/issues/350
    verb = obj.get('verb')
    if isinstance(self.source, FacebookPage):
      return gr_source.creation_result(
        abort=True,
        error_plain='Facebook is no longer supported. So long, and thanks for all the fish!',
        error_html='<a href="https://brid.gy/about#rip-facebook">Facebook is no longer supported. So long, and thanks for all the fish!</a>')

    if self.PREVIEW:
      result = self.source.gr_source.preview_create(
        obj, include_link=include_link, ignore_formatting=ignore_formatting)
      self.entity.published = result.content or result.description
      if not self.entity.published:
        return result  # there was an error
      return self._render_preview(result, include_link=include_link)

    else:
      result = self.source.gr_source.create(
        obj, include_link=include_link, ignore_formatting=ignore_formatting)
      self.entity.published = result.content
      if not result.content:
        return result  # there was an error
      if 'url' not in self.entity.published:
        self.entity.published['url'] = obj.get('url')
      self.entity.type = self.entity.published.get('type') or models.get_type(obj)
      self.response.headers['Content-Type'] = 'application/json'
      logging.info('Returning %s', json.dumps(self.entity.published, indent=2))
      self.response.headers['Location'] = self.entity.published['url'].encode('utf-8')
      self.response.status = 201
      return gr_source.creation_result(
        json.dumps(self.entity.published, indent=2))

Пример #4

Показать файл

Файл: publish.py Проект: lcorbasson/bridgy

  def attempt_single_item(self, item):
    """Attempts to preview or publish a single mf2 item.

    Args:
      item: mf2 item dict from mf2py

    Returns:
      CreationResult
    """
    self.maybe_inject_silo_content(item)
    obj = microformats2.json_to_object(item)

    ignore_formatting = self.ignore_formatting(item)
    if ignore_formatting:
      prop = microformats2.first_props(item.get('properties', {}))
      content = microformats2.get_text(prop.get('content'))
      if content:
        obj['content'] = content.strip()

    # which original post URL to include? in order of preference:
    # 1. rel-shortlink (background: https://github.com/snarfed/bridgy/issues/173)
    # 2. original user-provided URL if it redirected
    # 3. u-url if available
    # 4. actual final fetched URL
    if self.shortlink:
      obj['url'] = self.shortlink
    elif self.source_url() != self.fetched.url:
      obj['url'] = self.source_url()
    elif 'url' not in obj:
      obj['url'] = self.fetched.url
    logging.debug('Converted to ActivityStreams object: %s', json.dumps(obj, indent=2))

    # posts and comments need content
    obj_type = obj.get('objectType')
    if obj_type in ('note', 'article', 'comment'):
      if (not obj.get('content') and not obj.get('summary') and
          not obj.get('displayName')):
        return gr_source.creation_result(
          abort=False,
          error_plain='Could not find content in %s' % self.fetched.url,
          error_html='Could not find <a href="http://microformats.org/">content</a> in %s' % self.fetched.url)

    self.preprocess(obj)

    omit_link = self.omit_link(item)

    if not self.authorize():
      return gr_source.creation_result(abort=True)

    # RIP Facebook comments/likes. https://github.com/snarfed/bridgy/issues/350
    if (isinstance(self.source, FacebookPage) and
        (obj_type == 'comment' or obj.get('verb') == 'like')):
      return gr_source.creation_result(
        abort=True,
        error_plain='Facebook comments and likes are no longer supported. :(',
        error_html='<a href="https://github.com/snarfed/bridgy/issues/350">'
                   'Facebook comments and likes are no longer supported.</a> :(')

    if self.PREVIEW:
      result = self.source.gr_source.preview_create(
        obj, include_link=not omit_link, ignore_formatting=ignore_formatting)
      self.entity.published = result.content or result.description
      if not self.entity.published:
        return result  # there was an error
      state = {
        'source_key': self.source.key.urlsafe(),
        'source_url': self.source_url(),
        'target_url': self.target_url(),
        'bridgy_omit_link': omit_link,
      }
      vars = {'source': self.preprocess_source(self.source),
              'preview': result.content,
              'description': result.description,
              'webmention_endpoint': self.request.host_url + '/publish/webmention',
              'state': self.encode_state_parameter(state),
              }
      vars.update(state)
      logging.info('Rendering preview with template vars %s', pprint.pformat(vars))
      return gr_source.creation_result(
        template.render('templates/preview.html', vars))

    else:
      result = self.source.gr_source.create(obj, include_link=not omit_link,
                                            ignore_formatting=ignore_formatting)
      self.entity.published = result.content
      if not result.content:
        return result  # there was an error
      if 'url' not in self.entity.published:
        self.entity.published['url'] = obj.get('url')
      self.entity.type = self.entity.published.get('type') or models.get_type(obj)
      self.entity.type_label = self.source.TYPE_LABELS.get(self.entity.type)
      self.response.headers['Content-Type'] = 'application/json'
      logging.info('Returning %s', json.dumps(self.entity.published, indent=2))
      self.response.headers['Location'] = self.entity.published['url'].encode('utf-8')
      self.response.status = 201
      return gr_source.creation_result(
        json.dumps(self.entity.published, indent=2))

Пример #5

Показать файл

    def template_vars(self, domain=None, url=None):
        logging.debug(f'Headers: {list(request.headers.items())}')

        if domain.split('.')[-1] in NON_TLDS:
            error(f"{domain} doesn't look like a domain", status=404)

        # find representative h-card. try url, then url's home page, then domain
        urls = [f'http://{domain}/']
        if url:
            urls = [url, urllib.parse.urljoin(url, '/')] + urls

        for candidate in urls:
            resp = common.requests_get(candidate)
            parsed = util.parse_html(resp)
            mf2 = util.parse_mf2(parsed, url=resp.url)
            # logging.debug(f'Parsed mf2 for {resp.url}: {json_dumps(mf2, indent=2)}')
            hcard = mf2util.representative_hcard(mf2, resp.url)
            if hcard:
                logging.info(
                    f'Representative h-card: {json_dumps(hcard, indent=2)}')
                break
        else:
            error(
                f"didn't find a representative h-card (http://microformats.org/wiki/representative-hcard-parsing) on {resp.url}"
            )

        logging.info(f'Generating WebFinger data for {domain}')
        key = models.MagicKey.get_or_create(domain)
        props = hcard.get('properties', {})
        urls = util.dedupe_urls(props.get('url', []) + [resp.url])
        canonical_url = urls[0]

        acct = f'{domain}@{domain}'
        for url in urls:
            if url.startswith('acct:'):
                urluser, urldomain = util.parse_acct_uri(url)
                if urldomain == domain:
                    acct = f'{urluser}@{domain}'
                    logging.info(f'Found custom username: acct:{acct}')
                    break

        # discover atom feed, if any
        atom = parsed.find('link',
                           rel='alternate',
                           type=common.CONTENT_TYPE_ATOM)
        if atom and atom['href']:
            atom = urllib.parse.urljoin(resp.url, atom['href'])
        else:
            atom = 'https://granary.io/url?' + urllib.parse.urlencode(
                {
                    'input': 'html',
                    'output': 'atom',
                    'url': resp.url,
                    'hub': resp.url,
                })

        # discover PuSH, if any
        for link in resp.headers.get('Link', '').split(','):
            match = common.LINK_HEADER_RE.match(link)
            if match and match.group(2) == 'hub':
                hub = match.group(1)
            else:
                hub = 'https://bridgy-fed.superfeedr.com/'

        # generate webfinger content
        data = util.trim_nulls({
            'subject':
            'acct:' + acct,
            'aliases':
            urls,
            'magic_keys': [{
                'value': key.href()
            }],
            'links':
            sum(([{
                'rel': 'http://webfinger.net/rel/profile-page',
                'type': 'text/html',
                'href': url,
            }] for url in urls if url.startswith("http")), []) +
            [{
                'rel': 'http://webfinger.net/rel/avatar',
                'href': get_text(url),
            } for url in props.get('photo', [])] + [
                {
                    'rel': 'canonical_uri',
                    'type': 'text/html',
                    'href': canonical_url,
                },

                # ActivityPub
                {
                    'rel': 'self',
                    'type': common.CONTENT_TYPE_AS2,
                    # WARNING: in python 2 sometimes request.host_url lost port,
                    # http://localhost:8080 would become just http://localhost. no
                    # clue how or why. pay attention here if that happens again.
                    'href': f'{request.host_url}{domain}',
                },
                {
                    'rel': 'inbox',
                    'type': common.CONTENT_TYPE_AS2,
                    'href': f'{request.host_url}{domain}/inbox',
                },

                # OStatus
                {
                    'rel': 'http://schemas.google.com/g/2010#updates-from',
                    'type': common.CONTENT_TYPE_ATOM,
                    'href': atom,
                },
                {
                    'rel': 'hub',
                    'href': hub,
                },
                {
                    'rel': 'magic-public-key',
                    'href': key.href(),
                },
                {
                    'rel': 'salmon',
                    'href': f'{request.host_url}{domain}/salmon',
                }
            ]
        })
        logging.info(f'Returning WebFinger data: {json_dumps(data, indent=2)}')
        return data