def add_blog(request): ''' Adds a new blog to a user's profile. ''' if request.method == 'POST': if request.POST['feed_url']: feed_url = request.POST['feed_url'] # add http:// prefix if missing if feed_url[:4] != "http": feed_url = "http://" + feed_url # pull out human-readable url from feed_url # (naively - later we will crawl blog url for feed url) if re.search('atom.xml/*$', feed_url): url = re.sub('atom.xml/*$', '', feed_url) elif re.search('rss/*$', feed_url): url = re.sub('rss/*$', '', feed_url) else: url = feed_url # janky short circuit if they've already added this url for blog in Blog.objects.filter(user = request.user.id): if url == blog.url: print "FOUND %s which matches %s" % (blog.url, url) return HttpResponseRedirect('/new') # create new blog record in db blog = Blog.objects.create( user=User.objects.get(id=request.user.id), feed_url=feed_url, url=url, created=datetime.datetime.now(), ) blog.save() # Feedergrabber returns ( [(link, title, date)], [errors]) # We're not handling the errors returned for right now crawled, _ = feedergrabber27.feedergrabber(feed_url) # this try/except is a janky bugfix. This should be done with celery try: for post in crawled: post_url, post_title, post_date = post newpost = Post.objects.create( blog=Blog.objects.get(user=request.user.id), url=post_url, title=post_title, content="", date_updated=post_date, ) except: pass return HttpResponseRedirect('/new') else: return HttpResponse("I didn't get your feed URL. Please go back and try again.") else: return render_to_response('home/add_blog.html', {}, context_instance=RequestContext(request))
def add_blog(request): ''' Adds a blog to a user's profile as part of the create_account process. ''' if request.method == 'POST': if request.POST['feed_url']: feed_url = request.POST['feed_url'] # add http:// prefix if missing if feed_url[:4] != "http": feed_url = "http://" + feed_url # pull out human-readable url from feed_url # (naively - later we will crawl blog url for feed url) if re.search('atom.xml/*$', feed_url): url = re.sub('atom.xml/*$', '', feed_url) elif re.search('rss/*$', feed_url): url = re.sub('rss/*$', '', feed_url) else: url = feed_url # create new blog record in db blog = Blog.objects.create( user=User.objects.get(id=request.user.id), feed_url=feed_url, url=url, created=datetime.datetime.now(), ) blog.save() # Feedergrabber returns ( [(link, title, date)], [errors]) # We're not handling the errors returned for right now crawled, _ = feedergrabber27.feedergrabber(feed_url) for post in crawled: post_url, post_title, post_date = post newpost = Post.objects.create( blog=Blog.objects.get(user=request.user.id), url=post_url, title=post_title, content="", date_updated=post_date, ) return HttpResponseRedirect('/new') else: return HttpResponse( "I didn't get your feed URL. Please go back and try again.") else: return render_to_response('home/add_blog.html', {}, context_instance=RequestContext(request))
def add_blog(request): ''' Adds a blog to a user's profile as part of the create_account process. ''' if request.method == 'POST': if request.POST['feed_url']: feed_url = request.POST['feed_url'] # add http:// prefix if missing if feed_url[:4] != "http": feed_url = "http://" + feed_url # pull out human-readable url from feed_url # (naively - later we will crawl blog url for feed url) if re.search('atom.xml/*$', feed_url): url = re.sub('atom.xml/*$', '', feed_url) elif re.search('rss/*$', feed_url): url = re.sub('rss/*$', '', feed_url) else: url = feed_url # create new blog record in db blog = Blog.objects.create( user=User.objects.get(id=request.user.id), feed_url=feed_url, url=url, created=datetime.datetime.now(), ) blog.save() # Feedergrabber returns ( [(link, title, date)], [errors]) # We're not handling the errors returned for right now crawled, _ = feedergrabber27.feedergrabber(feed_url) for post in crawled: post_url, post_title, post_date = post newpost = Post.objects.create( blog=Blog.objects.get(user=request.user.id), url=post_url, title=post_title, content="", date_updated=post_date, ) return HttpResponseRedirect('/new') else: return HttpResponse("I didn't get your feed URL. Please go back and try again.") else: return render_to_response('home/add_blog.html', {}, context_instance=RequestContext(request))
def try_url(url): try: result = feedergrabber27.feedergrabber(url) except Exception as e: print(url, e) raise e if result[1]: print(result[1], '\n') elif not result[0]: print('\nEmpty result on {0} with error report:\n{1}.\n'. format(url, result[1])) sys.exit(0) else: print('.', end='') sys.stdout.flush()
def add_blog(request): ''' Adds a new blog to a user's profile. ''' if request.method == 'POST': feed_url = request.POST.get('feed_url', None) if feed_url: # add http:// prefix if missing if feed_url[:4] != "http": feed_url = "http://" + feed_url # pull out human-readable url from feed_url # (naively - later we will crawl blog url for feed url) if re.search('atom.xml/*$', feed_url): url = re.sub('atom.xml/*$', '', feed_url) elif re.search('rss/*$', feed_url): url = re.sub('rss/*$', '', feed_url) else: url = feed_url # janky short circuit if they've already added this url for blog in Blog.objects.filter(user=request.user.id): if url == blog.url: print "FOUND %s which matches %s" % (blog.url, url) return HttpResponseRedirect(reverse('new')) # Feedergrabber returns ( [(link, title, date)], [errors]) # We're not handling the errors returned for right now # Returns None if there was an exception when parsing the content. crawled, errors = feedergrabber27.feedergrabber( feed_url, suggest_feed_url=True) if crawled is None: message = ( "This url does not seem to contain valid atom/rss feed xml. " "Please use your blog's feed url! ") if errors and len(errors) == 1 and isinstance( errors[0], dict) and 'feed_url' in errors[0]: feed_url = errors[0]['feed_url'] if feed_url is not None: message += 'It may be this -- {}'.format(feed_url) messages.error(request, message) return HttpResponseRedirect(reverse('add_blog')) # create new blog record in db blog = Blog.objects.create( user=User.objects.get(id=request.user.id), feed_url=feed_url, url=url, ) # FIXME: this try/except is a janky bugfix. Use celery instead? # FIXME: very similar to code in crawlposts.get_or_create_post try: for post_url, post_title, post_date, post_content in crawled: post_date = timezone.make_aware( post_date, timezone.get_default_timezone()) Post.objects.create( blog=blog, url=post_url, title=post_title, content=post_content, date_posted_or_crawled=post_date, ) except Exception as e: print e return HttpResponseRedirect(reverse('new')) else: messages.error(request, "No feed URL provided.") return HttpResponseRedirect(reverse('add_blog')) else: return render(request, 'home/add_blog.html')
def add_blog(request): ''' Adds a new blog to a user's profile. ''' if request.method == 'POST': if request.POST['feed_url']: feed_url = request.POST['feed_url'] # add http:// prefix if missing if feed_url[:4] != "http": feed_url = "http://" + feed_url # pull out human-readable url from feed_url # (naively - later we will crawl blog url for feed url) if re.search('atom.xml/*$', feed_url): url = re.sub('atom.xml/*$', '', feed_url) elif re.search('rss/*$', feed_url): url = re.sub('rss/*$', '', feed_url) else: url = feed_url # janky short circuit if they've already added this url for blog in Blog.objects.filter(user=request.user.id): if url == blog.url: print "FOUND %s which matches %s" % (blog.url, url) return HttpResponseRedirect('/new') # create new blog record in db blog = Blog.objects.create( user=User.objects.get(id=request.user.id), feed_url=feed_url, url=url, created=datetime.datetime.now(), ) blog.save() # Feedergrabber returns ( [(link, title, date)], [errors]) # We're not handling the errors returned for right now crawled, _ = feedergrabber27.feedergrabber(feed_url) # this try/except is a janky bugfix. This should be done with celery try: for post in crawled: post_url, post_title, post_date = post newpost = Post.objects.create( blog=Blog.objects.get(user=request.user.id), url=post_url, title=post_title, content="", date_updated=post_date, ) except: pass return HttpResponseRedirect('/new') else: return HttpResponse( "I didn't get your feed URL. Please go back and try again.") else: return render_to_response('home/add_blog.html', {}, context_instance=RequestContext(request))
def add_blog(request): ''' Adds a new blog to a user's profile. ''' if request.method == 'POST': feed_url = request.POST.get('feed_url', None) if feed_url: # add http:// prefix if missing if feed_url[:4] != "http": feed_url = "http://" + feed_url # pull out human-readable url from feed_url # (naively - later we will crawl blog url for feed url) if re.search('atom.xml/*$', feed_url): url = re.sub('atom.xml/*$', '', feed_url) elif re.search('rss/*$', feed_url): url = re.sub('rss/*$', '', feed_url) else: url = feed_url # janky short circuit if they've already added this url for blog in Blog.objects.filter(user=request.user.id): if url == blog.url: print "FOUND %s which matches %s" % (blog.url, url) return HttpResponseRedirect(reverse('new')) # Feedergrabber returns ( [(link, title, date)], [errors]) # We're not handling the errors returned for right now # Returns None if there was an exception when parsing the content. crawled, errors = feedergrabber27.feedergrabber(feed_url, suggest_feed_url=True) if crawled is None: message = ( "This url does not seem to contain valid atom/rss feed xml. " "Please use your blog's feed url! " ) if errors and len(errors) == 1 and isinstance(errors[0], dict) and 'feed_url' in errors[0]: feed_url = errors[0]['feed_url'] if feed_url is not None: message += 'It may be this -- {}'.format(feed_url) messages.error(request, message) return HttpResponseRedirect(reverse('add_blog')) # create new blog record in db blog = Blog.objects.create( user=User.objects.get(id=request.user.id), feed_url=feed_url, url=url, ) # FIXME: this try/except is a janky bugfix. Use celery instead? # FIXME: very similar to code in crawlposts.get_or_create_post try: for post_url, post_title, post_date, post_content in crawled: post_date = timezone.make_aware(post_date, timezone.get_default_timezone()) Post.objects.create( blog=blog, url=post_url, title=post_title, content=post_content, date_posted_or_crawled=post_date, ) except Exception as e: print e return HttpResponseRedirect(reverse('new')) else: messages.error(request, "No feed URL provided.") return HttpResponseRedirect(reverse('add_blog')) else: return render(request, 'home/add_blog.html')