print 'Number of article:', len(articles) for article in articles: if article: a = article.find('a') if a.get('title'): post = Article() print a.get('title').encode('utf-8') post.title = a.get('title').encode('utf-8') print 'Link:', a['href'] post.link = 'http://rfi.fr' + a['href'] print a.get('data-height') if a.get('data-image'): print 'Image:', a.get('data-image') post.thumbnail = a.get('data-image') posts = Article.objects.filter(link = post.link) if posts.count()==0: post.source = 'RFI Afrique' post.view_count = 0 post.fetched_on = datetime.now() print post.__dict__ post.save() print '---'*20 print '\n\nFrom my database:' posts = Article.objects.all() for post in posts:
for a in articles[7:]: print '-'*60 if a: post = Article() link = a.find('a', {'class': 'morehltitle2012'}) if link : print 'Link ', link.get('href') post.link = url + link.get('href') desc = a.find('div', {'class': 'morehldesc'}) if desc : print 'Title:\n', desc.get_text().encode('utf-8') post.title = desc.get_text().encode('utf-8') if desc.img: print '\n\nImage', url + desc.img.get('src') post.thumbnail = url + desc.img.get('src') source = a.find('div', {'class': 'morehlsource'}) if source: print source.encode('utf-8') post.source = source.get_text().encode('utf-8') post.view_count = 0 posts = Article.objects.filter(link = post.link) if posts.count()==0: post.fetched_on = datetime.now() post.save() print '\n\nFrom my database:' posts = Article.objects.all() for post in posts: print post.link, post.title.encode('utf-8')