Пример #1
0
    def _get_nfzm_article(self, url, date, issue):
        medium = Medium.objects.get(pk=951)
        article                  = Article()
        article.medium           = medium
        article.issue            = issue
        article.url              = url
        article.publication_date = date

        r = requests.get(url, cookies={'PHPSESSID': 'l19dgbf6ticijmo9ka9osvufk0'})
        content = bs4.BeautifulSoup(r.content)
        article.title = content.title.string.split('-')[-1].strip()
        article.content = content.find('section', {'id' : 'articleContent'}).text

        author = content.find('span', {'class' : 'author'}).find_all('em')
        if author[1].text.find(u'南方周末记者') != -1:
            author, created = Journalist.objects.get_or_create(medium=medium, name=author[2].text.strip())
            if not created:
                article.author = author
        elif author[1].text.find(u'南方周末特约撰稿') != -1:
            article.author_name = author[2].text.strip()
        elif author[1].text.find(u'南方周末编辑部') != -1:
            article.author_name = u'南方周末编辑部'

        print article.author or article.author_name
        return article