示例#1
0
def get_first_appearance(page: metadata_parser.MetadataParser,
                         prop: str) -> str:
    """
    obtiene el primer elemento para la propiedad en la pagina pasada
    """
    prop_data = page.get_metadatas(prop)
    if type(prop_data) is list:
        return prop_data[0]
    else:
        return None
示例#2
0
def fetch_president_articles():
    from metadata_parser import MetadataParser

    created = 0
    updated = 0

    articles_to_fetch = PresidentCandidateArticle.objects.filter(
        information__isnull=True)

    for article in articles_to_fetch:
        page = MetadataParser(url=article.url)

        title = first_or_none(page.get_metadatas('title'))
        description = first_or_none(page.get_metadatas('description'))
        site = first_or_none(page.get_metadatas('site_name'))
        url = page.get_url_canonical()
        url = url if url else article.url
        image_url = page.get_metadata_link('image')

        information_obj, is_created = PresidentCandidateArticleInformation.objects.update_or_create(
            article=article,
            defaults={
                'title': title,
                'description': description,
                'site': site,
                'url': url
            })

        save_image_from_url(information_obj.image, image_url)

        if is_created:
            created += 1
        else:
            updated += 1

    return {'created': created, 'updated': updated}