示例#1
0
    def fetch(self):
        import BlogPost

        # don't fetch internally hosted blogs
        if not self.from_feed: return

        events = []

        # parse and iterate the feed
        entries = feedparser.parse(self.rss).entries
        for post in entries:
            try:
                date = dateutil.parser.parse(post.date).replace(tzinfo=None)
            except:
                date = datetime.datetime.utcnow()

            # don't re-add old posts
            if self.most_recent_date >= date:
                continue

            try:
                content = post.content[0].value
            except:
                content = post.description

            try:
                author_name = post.author_detail["name"]
            except:
                author_name = None

            # sanitize the post's content
            content = sanitize(content, [
                "h1", "h2", "h3", "h4", "h5", "h6", "a:href", "p", "ul", "ol",
                "li", "br", "div", "img:src:alt:title", "b", "i", "u",
                "strong", "em", "table", "tbody", "td", "th", "thead", "tfoot",
                "pre", "tt", "code"
            ])

            # format a summary for the post
            summary = sanitize(content, [],
                               strip_tags=[
                                   "h1", "h2", "h3", "h4", "h5", "h6", "p",
                                   "ul", "ol", "li", "br", "div", 'a', "b",
                                   "i", "u", "strong", "em", "pre", "tt",
                                   "code"
                               ])

            if len(summary) > 500:
                summary = summary[0:500] + u"..."
            summary = "<p>" + summary + "</p>"

            events.append(
                self.add_event(BlogPost.BlogPost,
                               title=post.title,
                               summary=summary,
                               from_feed=True,
                               author_name=author_name,
                               date=date,
                               extra_args={
                                   "external_link": post.link,
                                   "content": content,
                                   "blog_id": self.id
                               }))

        # find the new most recent date
        dates = [event.date for event in events if event is not None]
        dates.append(self.most_recent_date)
        self.most_recent_date = max(dates)
        self.save()
示例#2
0
 def fetch(self):
   import BlogPost
   
   # don't fetch internally hosted blogs
   if not self.from_feed: return
   
   events = []
   
   # parse and iterate the feed
   entries = feedparser.parse(self.rss).entries
   for post in entries:
     try:
       date = dateutil.parser.parse(post.date).replace(tzinfo=None)
     except:
       date = datetime.datetime.utcnow()
     
     # don't re-add old posts
     if self.most_recent_date >= date:
       continue
     
     try:
       content = post.content[0].value
     except:
       content = post.description
     
     try:
       author_name = post.author_detail["name"]
     except:
       author_name = None
     
     # sanitize the post's content
     content = sanitize(content, [
       "h1", "h2", "h3", "h4", "h5", "h6",
       "a:href", "p", "ul", "ol", "li", "br", "div",
       "img:src:alt:title",
       "b", "i", "u", "strong", "em",
       "table", "tbody", "td", "th", "thead", "tfoot",
       "pre", "tt", "code"
     ])
     
     # format a summary for the post
     summary = sanitize(content, [], strip_tags = [
       "h1", "h2", "h3", "h4", "h5", "h6",
       "p", "ul", "ol", "li", "br", "div", 'a',
       "b", "i", "u", "strong", "em",
       "pre", "tt", "code"
     ])
     
     if len(summary) > 500:
       summary = summary[0:500] + u"..."
     summary = "<p>" + summary + "</p>"
     
     events.append(self.add_event(BlogPost.BlogPost,
       title = post.title,
       summary = summary,
       from_feed = True,
       author_name = author_name,
       date = date,
       extra_args = {
         "external_link": post.link,
         "content": content,
         "blog_id": self.id
       }
     ))
   
   # find the new most recent date
   dates = [event.date for event in events if event is not None]
   dates.append(self.most_recent_date)
   self.most_recent_date = max(dates)
   self.save()
示例#3
0
  def add_event(self, klass,
                title = None,
                summary = None,
                date = None,
                author_name = None,
                from_feed = None,
                append_unsanitized = "",
                extra_args = {}):
    # convert to UTC
    secs = time.mktime(date.timetuple())
    date = datetime.datetime.utcfromtimestamp(secs)
    
    # don't re-add old events
    if self.most_recent_date >= date:
      return
    
    # can we find an author for this event?
    from dashboard.models import Blog
    if self.__class__ is not Blog or self.user is None:
      if author_name is not None:
        author, author_name, author_email = find_author(author_name)
      else:
        author = None
        author_email = None
    else:
      author = self.user
      author_email = None
    
    # sanitize the summary
    summary = append_unsanitized + sanitize(summary, [
      "h1", "h2", "h3", "h4", "h5", "h6",
      "a:href", "p", "ul", "ol", "li", "br",
      "b", "i", "u", "strong", "em", "div",
      "pre", "tt", "code"
    ])

    # create and save the event object
    event = klass(author_name = author_name,
                  title = title,
                  summary = summary,
                  from_feed = from_feed,
                  date = date,
                  author_email = author_email,
                  **extra_args)
    if author is not None:
      event.author = author
    event.save()
    
    # if this is a personal blog, we're all done
    if self.__class__ == Blog:
      if self.user is not None:
        print "Personal blog found by {0}".format(self.user.get_full_name())
        return event
    
    # set the project
    event.project = self.project
    event.save()
    
    # add a contributor for the author if they are not a project author
    if author is not None or author_name is not None:
      from Contributor import Contributor
      cont = None
      
      # if there is no "author", create a contributor associated with a
      # name and an email. this can be upgraded to a User if that person
      # joins observatory.
      if author is None:
        try:
          if author_email is not None:
            cont = Contributor.objects.get(email = author_email)
          else:
            raise Contributor.DoesNotExist()
        except Contributor.DoesNotExist:
          try:
            cont = Contributor.objects.get(name = author_name)
          except Contributor.DoesNotExist:
            cont = Contributor(name = author_name, email = author_email)
      
      # otherwise, associate with the user model
      else:
        try:
          cont = Contributor.objects.get(user = author)
        except Contributor.DoesNotExist:
          cont = Contributor(user = author)
      
      # save the contributor and add it to the project
      cont.save()
      cont.projects.add(event.project)
      
    # print out results
    print "{0} by {1}{2} in {3} at {4}".format(
      klass.__name__,
      author_name,
      " (found)" if author != None else "",
      self.project.title,
      date
    )
    return event
示例#4
0
    def add_event(self,
                  klass,
                  title=None,
                  summary=None,
                  date=None,
                  author_name=None,
                  from_feed=None,
                  append_unsanitized="",
                  extra_args={}):
        # convert to UTC
        secs = time.mktime(date.timetuple())
        date = datetime.datetime.utcfromtimestamp(secs)

        # don't re-add old events
        if self.most_recent_date >= date:
            return

        # can we find an author for this event?
        from dashboard.models import Blog
        if self.__class__ is not Blog or self.user is None:
            if author_name is not None:
                author, author_name, author_email = find_author(author_name)
            else:
                author = None
                author_email = None
        else:
            author = self.user
            author_email = None

        # sanitize the summary
        summary = append_unsanitized + sanitize(summary, [
            "h1", "h2", "h3", "h4", "h5", "h6", "a:href", "p", "ul", "ol",
            "li", "br", "b", "i", "u", "strong", "em", "div", "pre", "tt",
            "code"
        ])

        # create and save the event object
        event = klass(author_name=author_name,
                      title=title,
                      summary=summary,
                      from_feed=from_feed,
                      date=date,
                      author_email=author_email,
                      **extra_args)
        if author is not None:
            event.author = author
        event.save()

        # if this is a personal blog, we're all done
        if self.__class__ == Blog:
            if self.user is not None:
                print "Personal blog found by {0}".format(
                    self.user.get_full_name())
                return event

        # set the project
        event.project = self.project
        event.save()

        # add a contributor for the author if they are not a project author
        if author is not None or author_name is not None:
            from Contributor import Contributor
            cont = None

            # if there is no "author", create a contributor associated with a
            # name and an email. this can be upgraded to a User if that person
            # joins observatory.
            if author is None:
                try:
                    if author_email is not None:
                        cont = Contributor.objects.get(email=author_email)
                    else:
                        raise Contributor.DoesNotExist()
                except Contributor.DoesNotExist:
                    try:
                        cont = Contributor.objects.get(name=author_name)
                    except Contributor.DoesNotExist:
                        cont = Contributor(name=author_name,
                                           email=author_email)

            # otherwise, associate with the user model
            else:
                try:
                    cont = Contributor.objects.get(user=author)
                except Contributor.DoesNotExist:
                    cont = Contributor(user=author)

            # save the contributor and add it to the project
            cont.save()
            cont.projects.add(event.project)

        # print out results
        print "{0} by {1}{2} in {3} at {4}".format(
            klass.__name__, author_name, " (found)" if author != None else "",
            self.project.title, date)
        return event