Пример #1
0
    def _get_zgqnb_article(self, url, date, page_name):
        print url
        medium = Medium.objects.get(pk=1836)
        urls = bs4.BeautifulSoup(requests.get(url).content).find('div', {'id' : 'titleList'}).find_all('a')
        for a in urls:
            article_url = urljoin(url, a.get('href'))
            soup = bs4.BeautifulSoup(requests.get(article_url).content)
            title = soup.find('h1').text
            print title
            article = Article()
            article.medium = medium
            article.title = title
            article.url = article_url
            article.publication_date = date
            article.page = page_name

            p_list = []
            for p in soup.find('div', {'id' : 'ozoom'}).find_all('p'):
                p_list.append(p.text)
            content = '\n'.join(p_list)
            article.content = content
            if Article.objects.filter(medium=medium).filter(url=article_url).count():
                article = Article.objects.filter(medium=medium).get(url=article_url)
                article.content = content

            article.save()
Пример #2
0
    def _get_nfzm_article(self, url, date, issue):
        medium = Medium.objects.get(pk=951)
        article                  = Article()
        article.medium           = medium
        article.issue            = issue
        article.url              = url
        article.publication_date = date

        r = requests.get(url, cookies={'PHPSESSID': 'l19dgbf6ticijmo9ka9osvufk0'})
        content = bs4.BeautifulSoup(r.content)
        article.title = content.title.string.split('-')[-1].strip()
        article.content = content.find('section', {'id' : 'articleContent'}).text

        author = content.find('span', {'class' : 'author'}).find_all('em')
        if author[1].text.find(u'南方周末记者') != -1:
            author, created = Journalist.objects.get_or_create(medium=medium, name=author[2].text.strip())
            if not created:
                article.author = author
        elif author[1].text.find(u'南方周末特约撰稿') != -1:
            article.author_name = author[2].text.strip()
        elif author[1].text.find(u'南方周末编辑部') != -1:
            article.author_name = u'南方周末编辑部'

        print article.author or article.author_name
        return article
Пример #3
0
    def _get_whb_article(self, url, date,issue, page):
        medium = Medium.objects.get(pk=1399)
        soup = bs4.BeautifulSoup(requests.get(url).content)
        for title in soup.find('div', {'id' : 'BT'}).find_all('a'):

            article_page_url = urljoin(url, title.get('href'))
            r = requests.get(article_page_url)
            if r.status_code == 404:
                continue
            article_page = bs4.BeautifulSoup( r.content)

            if Article.objects.filter(medium=medium).filter(url=article_page_url).count():
                article = Article.objects.filter(medium=medium).get(url=article_page_url)
            else:
                article = Article()
                article.medium = medium

                article.url = article_page_url
                article.publication_date = date
                article.page = page.text.strip()
                article.issue = issue

            print article_page_url
            title = article_page.title.text.strip().replace(u'文汇报 - ', '')
            article.title = title
            article.content = article_page.find('div', {'id' : 'articleText'}).text.strip().replace(u'  ', '\n  ')
            article.save()
Пример #4
0
def addArticle(request):
	article = Article()
	article.title = request.POST.get('title', '')
	article.abstract = request.POST.get('abstract', '')
	article.category_id = request.POST.get('categorie');
	article.content = request.POST.get('content', '')
	article.author = User.objects.get(id=request.user.id)
	try:
		article.save()
		return HttpResponseRedirect('/articles/read')
	except:
		return HttpResponseRedirect('/')
Пример #5
0
def add_article(request):
    if request.method == 'POST':
        form = ArticleForm(request.POST)
        if form.is_valid():
            article = Article()
            article.create_user = request.user
            article.title = form.cleaned_data.get('title')
            article.content = form.cleaned_data.get('content').replace('\n', '<br>')
            node_name = form.cleaned_data.get('node')
            article.add_node(node_name)
            article.save()
            return index(request)
    else:
        form = ArticleForm()
        return render(request,'add_article.html', {'form': form})
Пример #6
0
def add_article(request):
    if request.method == 'POST':
        form = ArticleForm(request.POST)
        if form.is_valid():
            article = Article()
            article.create_user = request.user
            article.title = form.cleaned_data.get('title')
            article.content = form.cleaned_data.get('content').replace(
                '\n', '<br>')
            node_name = form.cleaned_data.get('node')
            article.add_node(node_name)
            article.save()
            return index(request)
    else:
        form = ArticleForm()
        return render(request, 'add_article.html', {'form': form})
Пример #7
0
def GenerateOneFakeData():
    art = Article()
    art.title = get_random_str(name_str, 12)
    art.brief = get_random_str(brief_str)
    art.content = desc_str
    art.click_number = randint(10, 100)
    art.favor_number = randint(10, 100)
    art.comment_number = randint(10, 100)
    art.word_count = randint(100, 1000)
    art.front_image = "articles/images/art.png"
    # 创建当前文章的用户
    art.user_id = users[randint(0, users.count() - 1)].id
    # 当前文章的类别
    art.category_id = categories[randint(0, categories.count() - 1)].id
    # 为当前文章指定若干tags
    # art.tags.set(ArticleTag(name=tags[randint(0,tags.count()-1)].name))
    return art
Пример #8
0
def write(request):
    if request.method == 'POST':
        form = ArticleForm(request.POST)
        if form.is_valid():
            article = Article()
            article.create_user = request.user
            article.title = form.cleaned_data.get('title')
            article.content = form.cleaned_data.get('content')
            status = form.cleaned_data.get('status')
            if status in [Article.PUBLISHED, Article.DRAFT]:
                article.status = form.cleaned_data.get('status')
            article.save()
            tags = form.cleaned_data.get('tags')
            article.create_tags(tags)
            return redirect('/articles/')
    else:
        form = ArticleForm()
    return render(request, 'articles/write.html', {'form': form})
Пример #9
0
def write(request):
    if request.method == 'POST':
        form = ArticleForm(request.POST)
        if form.is_valid():
            article = Article()
            article.create_user = request.user
            article.title = form.cleaned_data.get('title')
            article.content = form.cleaned_data.get('content')
            status = form.cleaned_data.get('status')
            if status in [Article.PUBLISHED, Article.DRAFT]:
                article.status = form.cleaned_data.get('status')
            article.save()
            tags = form.cleaned_data.get('tags')
            article.create_tags(tags)
            return redirect('/articles/')
    else:
        form = ArticleForm()
    return render(request, 'articles/write.html', {'form': form})
Пример #10
0
    def _get_qlwb_article(self, url, date, issue, page):
        print page
        medium = Medium.objects.get(pk=1025)
        soup = bs4.BeautifulSoup(requests.get(url).content)

        if Article.objects.filter(medium=medium).filter(url=url).count():
            article = Article.objects.filter(medium=medium).get(url=url)
        else:
            article = Article()

            article.medium = medium
            article.title = soup.find('td', {'class' : 'font01'}).text.strip().replace(u'  ', '\n  ')

            article.url = url
            article.publication_date = date
            article.page = page
            article.issue = issue
            
        article.content = soup.find('span', {'id' : 'contenttext'}).text.strip().replace(u'  ', '\n  ')
        article.save()
Пример #11
0
    def handle(self, *args, **options):
        filename = options.get('filename')

        dom = minidom.parse(filename)

        blog = [] # list that will contain all posts

        categories_set = set()
        authors_set = set()

        for node in dom.getElementsByTagName('item'):
            status = node.getElementsByTagName('wp:status')[0].firstChild.data
            if status != "publish":
                continue
            post = dict()
            post["title"] = node.getElementsByTagName('title')[0].firstChild.data
            post["date"] = node.getElementsByTagName('pubDate')[0].firstChild.data
            post["author"] = node.getElementsByTagName(
                            'dc:creator')[0].firstChild.data
            post["id"] = node.getElementsByTagName('wp:post_id')[0].firstChild.data

            if node.getElementsByTagName('content:encoded')[0].firstChild != None:
                post["text"] = node.getElementsByTagName(
                                'content:encoded')[0].firstChild.data
            else:
                post["text"] = ""

            # wp:attachment_url could be use to download attachments

            # Get the categories
            tempCategories = []
            post['author_id'] = AUTHORS[post['author']]
            print post['author_id']
            for subnode in node.getElementsByTagName('category'):
                tempCategories.append(subnode.getAttribute('nicename'))
                categories_set.update(tempCategories)
            categories = [x for x in tempCategories if x != '']
            post['tags'] = " ".join([x for x in tempCategories if x != ''])
            post["categories"] = categories
            # Add post to the list of all posts
            blog.append(post)

        from articles.models import Article, Tag
        import datetime
        errors = []
        for entry in blog:
            article = Article()
            article.title = entry['title']
            article.content = entry['text']
            dia = entry['date'][4:7]
            mes = MESES[entry['date'][8:11]]
            anio = entry['date'][12:16]
            print entry['date']
            print anio
            pub_date = datetime.datetime(int(anio), mes, int(dia))
            article.publish_date = pub_date
            article.author_id = entry['author_id']
            article.author_id = entry['author_id']
            article.status_id = 2
            try:
                article.save()
            except:
                errors.append(entry['title'])
            else:
                new_tags = []
                for tag in entry['categories']:
                    ntag = Tag.objects.get_or_create(name=tag)
                    new_tags.append(ntag[0])
                    article.tags.add(*new_tags)
        print(len(errors))
        print errors
Пример #12
0
def publish(f, draft, by=None, publish=None, is_active=True, login_required=False, debug=False):
    """
    Publishes an article.

    :param f: The file to parse
    :type f: file
    :param by: Author username
    :type by: str
    :param draft: Save as draft?
    :type draft: bool
    :param draft: Article active?
    :type draft: bool
    :param draft: Require login?
    :type draft: bool
    :param publish: When to publish
    :type publish: datetime fmt=YYYY-MM-DD HH:MM
    :param debug: Print debug data
    :type debug: bool

    :returns: Saved :class:`Article`
    """
    meta, content = parse_meta_and_article(f.read())
    meta.update({
        'is_active': is_active,
        'login_required': login_required,
    })
    if draft:
        meta['status'] = 'Draft'
    if publish:
        meta['publish'] = publish
    if by:
        meta['by'] = by
    slug = slugify(meta['title'])
    # New or updated?
    articles = Article.objects.using(DB).filter(slug=slug)
    if len(articles) > 1:
        if not publish:
            raise ConfigurationError('Title ambiguous; supply publish date')
        articles.filter(publish_date=meta['publish'])
        article = articles[0]
    elif len(articles) == 1:
        article = articles[0]
    else:
        article = Article()

    article.content = content
    article.markup = 'h'

    todo = []
    keys = list(meta.keys())
    for key in keys + [k for k in REQUIRED_FIELDS+FIELD_DEFAULTS.keys() if k not in keys]:
        value = filter_field(key, meta)
        if key not in SAVE_NEEDED:
            setfield(article, key, value, debug)
        else:
            todo.append((key, value))
    article.save(using=DB)

    for key, value in todo:
        setfield(article, key, value, debug)
    article.save(using=DB)

    return article