示例#1
0
def create_link_preview(page: MetadataParser, page_meta: dict,
                        url: str) -> Optional[str]:
    """
    Create a preview bookmark card from a URL.

    :param MetadataParser page: Page object create from URL to be parsed.
    :param dict page_meta: Page metadata parsed from the head of the target URL.
    :param str url: URL of the linked third-party post/article.

    :returns: Optional[str]
    """
    try:
        title, description, page_type = parse_scraped_metadata(page_meta)
        image = page.get_metadata_link("image",
                                       allow_encoded_uri=True,
                                       require_public_global=True)
        if title is not None and description is not None:
            preview = f"\n\n<b>{title}</b>\n{description}\n{url}"
            if page_type:
                preview += f"\n{page_type.title()}"
            if image:
                preview += f"\n{image}"
            return preview
    except Exception as e:
        LOGGER.error(
            f"Unexpected error while generating link preview card: {e}")
示例#2
0
def fetch_president_articles():
    from metadata_parser import MetadataParser

    created = 0
    updated = 0

    articles_to_fetch = PresidentCandidateArticle.objects.filter(
        information__isnull=True)

    for article in articles_to_fetch:
        page = MetadataParser(url=article.url)

        title = first_or_none(page.get_metadatas('title'))
        description = first_or_none(page.get_metadatas('description'))
        site = first_or_none(page.get_metadatas('site_name'))
        url = page.get_url_canonical()
        url = url if url else article.url
        image_url = page.get_metadata_link('image')

        information_obj, is_created = PresidentCandidateArticleInformation.objects.update_or_create(
            article=article,
            defaults={
                'title': title,
                'description': description,
                'site': site,
                'url': url
            })

        save_image_from_url(information_obj.image, image_url)

        if is_created:
            created += 1
        else:
            updated += 1

    return {'created': created, 'updated': updated}
示例#3
0
        'http://hw.libsyn.com/p/d/d/6/dd6b0db2d4858640/ARIYNBF_107_JamesGunn.mp3?sid=78edb823ad1b62ff6f329d68bbb2cc6a&l_sid=35168&l_eid=&l_mid=2952818&expiration=1334720066&hwt=7acfe1754c8dedc4f134b473894c9208'
    ]
    for i in urls:
        a = MetadataParser(url=i)
        print(a.__dict__)

if 0:
    url = 'http://soundcloud.com/electricyouthmusic'
    a = MetadataParser(url=url)
    print(a.__dict__)

if 0:
    url = 'http://agrrrdog.blogspot.in/2016/06/remote-detection-of-users-av-via-flash.html'
    a = MetadataParser(url=url)
    pprint.pprint(a.metadata)
    print a.get_metadata_link('image', strategy=['og'])
    print a.get_metadata_link('image', strategy=['og'])
    print a.peername

if 0:
    url = 'https://twitter.com/gaussian36/status/810919575172825088'
    a = MetadataParser(url=url, search_head_only=False)
    pprint.pprint(a.metadata)
    print a.peername

if 1:
    url = 'http://nyp.st/2ikSU6N'
    a = MetadataParser(url=url, search_head_only=False)
    pprint.pprint(a.metadata)
    print a.peername
示例#4
0
def social_card_image(page_url):
    parser = MetadataParser(url=page_url, search_head_only=True)

    link = parser.get_metadata_link('image', strategy=['og'])

    return link
示例#5
0
        'http://hw.libsyn.com/p/d/d/6/dd6b0db2d4858640/ARIYNBF_107_JamesGunn.mp3?sid=78edb823ad1b62ff6f329d68bbb2cc6a&l_sid=35168&l_eid=&l_mid=2952818&expiration=1334720066&hwt=7acfe1754c8dedc4f134b473894c9208'
    ]
    for i in urls:
        a = MetadataParser(url=i)
        print(a.__dict__)

if 0:
    url = 'http://soundcloud.com/electricyouthmusic'
    a = MetadataParser(url=url)
    print(a.__dict__)

if 0:
    url = 'http://agrrrdog.blogspot.in/2016/06/remote-detection-of-users-av-via-flash.html'
    a = MetadataParser(url=url)
    pprint.pprint(a.metadata)
    print a.get_metadata_link('image', strategy=['og'])
    print a.get_metadata_link('image', strategy=['og'])
    print a.peername
    
if 0:
    url = 'https://twitter.com/gaussian36/status/810919575172825088'
    a = MetadataParser(url=url, search_head_only=False)
    pprint.pprint(a.metadata)
    print a.peername

if 1:
    url = 'http://nyp.st/2ikSU6N'
    a = MetadataParser(url=url, search_head_only=False)
    pprint.pprint(a.metadata)
    print a.peername