Exemplo n.º 1
0
    def _new_publisher(self, feed):
        name = feed.get('publisher') \
            or feed.get('author') \
            or feed.get('contributors') \
            or 'unknown'

        safe_name = self._sanitize_title(name)
        publisher, is_new = Publisher.get_or_create(name=safe_name)
        print('Saved publisher "{}": new={}'.format(safe_name, is_new))
        return publisher
Exemplo n.º 2
0
    def _new_publisher(self, feed):
        name = feed.get('publisher') \
            or feed.get('author') \
            or feed.get('contributors') \
            or 'unknown'

        safe_name = self._sanitize_title(name)
        publisher, is_new = Publisher.get_or_create(name=safe_name)
        print('Saved publisher "{}": new={}'.format(safe_name, is_new))
        return publisher
def save_article(article_):
    img_url = article_.find(class_='img_box2').find(
        'img').attrs['src'].split('url=')[1]
    text_box = article_.find(class_='txt-box')
    title = text_box.find('h4').find('a').text
    article_url = text_box.find('h4').find('a').attrs['href']
    summary = text_box.find('p').text
    create_at = datetime.fromtimestamp(float(text_box.find(
        class_='s-p').attrs['t']))
    publisher_name = text_box.find(class_='s-p').find('a').attrs['title']

    article = Article(img_url=img_url, title=title, article_url=article_url,
                      summary=summary, create_at=create_at,
                      publisher=Publisher.get_or_create(publisher_name))
    try:
        article.save()
    except (NotUniqueError, InvalidBSON):
        pass
def save_article(article_):
    img_url = article_.find(
        class_='img_box2').find('img').attrs['src'].split('url=')[1]
    text_box = article_.find(class_='txt-box')
    title = text_box.find('h4').find('a').text
    article_url = text_box.find('h4').find('a').attrs['href']
    summary = text_box.find('p').text
    create_at = datetime.fromtimestamp(
        float(text_box.find(class_='s-p').attrs['t']))
    publisher_name = text_box.find(class_='s-p').find('a').attrs['title']

    article = Article(img_url=img_url,
                      title=title,
                      article_url=article_url,
                      summary=summary,
                      create_at=create_at,
                      publisher=Publisher.get_or_create(publisher_name))
    try:
        article.save()
    except (NotUniqueError, InvalidBSON):
        pass
Exemplo n.º 5
0
def save_article(article_):
    try:
        img_box = article_.find(class_='img-box')
        img_url = img_box.find('a').attrs['href'] if img_box is not None else ""
        text_box = article_.find(class_='txt-box')
        title = text_box.find('h3').find('a').text
        article_url = text_box.find('h3').find('a').attrs['href']
        summary = text_box.find('p').text
        create_at = datetime.fromtimestamp(float(text_box.find(
            class_='s-p').attrs['t']))
        publisher_name = text_box.find(class_='s-p').find('a').text

        article = Article(img_url=img_url, title=title, article_url=article_url,
                          summary=summary, create_at=create_at,
                          publisher=Publisher.get_or_create(publisher_name))
    except:
        import pdb; pdb.set_trace()
    try:
        article.save()
    except (NotUniqueError, InvalidBSON):
        pass