class ArticleAppPipeline(object): def __init__(self, publisher, *args, **kwargs): self.publisher = publisher @classmethod def from_crawler(cls, crawler): return cls( # this will be passed from django view publisher=crawler.settings.get('publisher'), ) def close_spider(self, spider): pass def process_item(self, article, spider): self.article = Article() #Get c_journal_id from exist issn journal = Journal.objects.get(issn=article['article']['issn']) self.article.c_journal_id = journal self.article.title = article['article']['title'] self.article.abstract = article['article']['abstract'] self.article.doi = article['article']['doi'] self.article.keyword = article['article']['keyword'] self.article.publication_date = article['article']['publication_date'] self.article.uri = article['article']['uri'] self.article.pdf_uri = article['article']['pdf_uri'] self.article.save() return article
def handle(self, *args, **options): a = Article() aList = a.getAll() for i in aList: if i.htmlSource == None: r = requests.get(i.url) srcData = r.text.encode('utf-8') srcFile = ContentFile( srcData) # srcData is the contents of the local file srcFile.open() i.htmlSource.save(name='%s.html' % i.title, content=srcFile) print 'added source code to article %s' % i.url
def setUp(self): super(TestMetrics, self).setUp() dt = datetime.utcnow().replace(tzinfo=timezone.utc) day = timedelta(days=1) User.objects.create(username='******', date_joined=dt) User.objects.create(username='******', date_joined=dt - day * 2) User.objects.create(username='******', date_joined=dt - day * 6) User.objects.create(username='******', date_joined=dt - day * 6) Article.objects.bulk_create([Article(), Article(), Article()]) Article.objects.all().update(created=dt - day * 3)
def get_data(): from requests import get from main.models import Article from bs4 import BeautifulSoup from django.db.utils import IntegrityError r = get('https://news.ycombinator.com/') soup = BeautifulSoup(r.text, features='html.parser') all_links = soup.find_all('a', class_='storylink') try: for link in all_links: article = Article(title=link.string, url=link.get('href')) article.save() except IntegrityError: pass
def process_item(self, article, spider): self.article = Article() #Get c_journal_id from exist issn journal = Journal.objects.get(issn=article['article']['issn']) self.article.c_journal_id = journal self.article.title = article['article']['title'] self.article.abstract = article['article']['abstract'] self.article.doi = article['article']['doi'] self.article.keyword = article['article']['keyword'] self.article.publication_date = article['article']['publication_date'] self.article.uri = article['article']['uri'] self.article.pdf_uri = article['article']['pdf_uri'] self.article.save() return article
def new_article(): if request.method == 'POST': article = Article(title=request.form.get('title'), content=request.form.get('editordata'), author=current_user) db.session.add(article) db.session.commit() flash('Your article has been posted!', 'success') return redirect(url_for('dashboard')) return render_template('new_article.html', title='New Article', legend='New Article')
def add_article(request): if request.user.is_authenticated and request.POST: print(request.POST) article = Article() article.author= request.user article.title = request.POST["article_title"] article.content = request.POST["article_body"] article.save() return redirect(request.META['HTTP_REFERER']) else: return render(request, 'main/add-article.html')
def process_item(self, item, spider): list_of_authors_in_db = list(Author.objects.all().values_list('author_short', flat=True)) links_of_articles_in_db = list(Article.objects.all().values_list('article_link', flat=True)) if not item['article_link'] in links_of_articles_in_db: article = Article() article.article_link = item['article_link'] article.article_content = item['article_content'] if item['author_short'] in list_of_authors_in_db: article_author = Author.objects.get(author_short=item['author_short']) else: # If not, create new DB Authors item and then assign. article_author = Author(author_fullname=item['article_author_id'], author_short=item['author_short']) article.article_author = article_author article_author.save() article.save() self.connection.commit() return item
def addNewsResualt(request): tempNews = Article() tempNews.cat = request.GET['cat'] tempNews.teacher_id = 1 tempNews.title = request.GET['title'] tempNews.tag = request.GET['tag'] tempNews.text = request.GET['news'] tempNews.activity_start_date = request.GET['fromdate'] tempNews.activity_end_date = request.GET['todate'] tempNews.valid = 1 tempNews.date = (datetime.datetime.now()) tempNews.save() posts = tempNews.__class__.objects.all() t = loader.get_template('xinwendongtai.html') c = Context({'posts': posts}) return HttpResponse(t.render(c))
def get(self, request, format=None): permission_classes = [permissions.AllowAny] data = Article.get_all_articles() serializers = ArticleSerializer(data, many=True) return response.Response(serializers.data, status=status.HTTP_200_OK)
def get(self, request, id, format=None): rating = Rating.get_article_ratings(Article.get_article(id)) data = Article.get_article(id) serializers = ArticleSerializer(data, many=False) return response.Response(serializers.data, status=status.HTTP_200_OK)
def addNewsResualt(request): tempNews=Article() tempNews.cat= request.GET['cat'] tempNews.teacher_id= 1 tempNews.title= request.GET['title'] tempNews.tag= request.GET['tag'] tempNews.text= request.GET['news'] tempNews.activity_start_date= request.GET['fromdate'] tempNews.activity_end_date= request.GET['todate'] tempNews.valid=1 tempNews.date=(datetime.datetime.now()) tempNews.save() posts = tempNews.__class__.objects.all() t =loader.get_template('xinwendongtai.html') c =Context({'posts':posts}) return HttpResponse(t.render(c))