def _write_to_db(self, item): try: post = News.objects.get(title=item["title"]) except News.DoesNotExist: post = News(title=item["title"]) post.body = item["description"] post.order = item["order"] post.link = item["link"] post.date = item["date"] post.save() return post.pk
def handle(self, *arguments, **options): print 'START' i = 1 block = soup.find_all('div', {'class': 'item-container'}) for l in block: record = News() record.title = 'Title %s' % i findText = l.findNext('div', {'class': 'issue-item'}).find_all('p') a = '' for m in findText: a += m.text record.content = a record.urls = l.find('a', { 'class': 'issue-item-title' }).get('href', 'http') record.save() print 'Saving %s' % i i += 1
def process_item(self, item, spider): if item.get('type') == "news": news = News( headline=item.get('headline'), body=item.get('body'), url=item.get('url'), byline=item.get('byLine'), section=item.get('section'), picture=item.get('picture'), ) news.save() elif item.get('type') == "tweet": tweet = Tweet(tweet=item.get('tweet'), time=item.get('time'), user=item.get('user'), user_name=item.get('user_name'), link=item.get('link'), user_picture=item.get('user_picture')) tweet.save() return item
def each_article(self, category, url_list): main = Category(name=category) main.save() for link in url_list: soup = self.soup(link) div = soup.find("div", {"class": "bdaia-post-content"}) p_list = div.find_all("p") description = [] for p in p_list: description.append(p.get_text()) self.article.append(u''.join(description).encode('utf-8')) full = zip(self.title_list, self.href_list, self.description, self.article) for i in full: item = News(item_title=i[0], item_link=i[1], item_short_descr=i[2], article=i[3], category=main) item.save() i = []
def news_add(request, **kwargs): news = News(**kwargs) news.save() return {'news': news.tojson()}
) def handle(self, *args, **options): # ... print 'Start' <<<<<<< HEAD #News.objects.delete() #for r in News.objects.filter(pk__gt=10): # print r.delete() return True for i in range(1,200): record = News() record.title = 'Title %s' % i record.content = 'Content %s' % i record.save() print 'Saving %s' % i if options['url']: print 'Loadd from %s' % options['url'] # ... print 'End' ======= import requests from bs4 import BeautifulSoup url = 'https://pythondigest.ru/' r = requests.get( url ) encoded_page = r.text.encode( 'utf-8' )