Пример #1
0
def generate_reference_user_status(user,references):
  """Generate reference user status instances for a given set of references.
  WARNING: the new instances are not saved in the database!
  """
  new_ref_status = []
  for ref in references:
    rust = ReferenceUserStatus()
    rust.owner = user
    rust.reference = ref
    rust.reference_pub_date = ref.pub_date
    source_query = ref.sources.filter(userprofile=user.userprofile)\
                              .distinct().order_by("pub_date")
    try:
      rust.main_source = source_query.get()
    except MultipleObjectsReturned:
      rust.main_source = source_query.all()[0]
    except ObjectDoesNotExist:
      try:
        rust.main_source = Reference.objects.get(url="<unknown>")
      except ObjectDoesNotExist:
        s = Reference(url="<unknown>",title="<unknown>",
                      save_count=1,
                      pub_date=datetime.utcfromtimestamp(0)\
                      .replace(tzinfo=timezone.utc))        
        s.save()
        rust.main_source = s
    new_ref_status.append(rust)
  return new_ref_status
Пример #2
0
def create_reference_from_feedparser_entry(entry, date, previous_ref):
    """Takes a FeedParser entry and create a reference from it and
  attributing it the publication date given in argument.

  If the corresponding Reference already exists, it must be given as
  the previous_ref argument, and if previous_ref is None, it will be
  assumed that there is no matching Rerefence in the db.

  Note: Enforce Dave Winer's recommendation for linkblog:
  http://scripting.com/2014/04/07/howToDisplayTitlelessFeedItems.html
  with a little twist: if a feed item has no title we will use the
  (possibly truncated) description as a title and if there is no
  description the link will be used. In any case the description of a
  reference is set even if this description is also used for the
  title.
  
  Return a tuple with the unsaved reference and a list of tag names.
  """
    url = entry.link
    info = ""
    tags = set()
    if entry.get("tags", None):
        tags = set([t.term for t in entry.tags])
    if previous_ref is None:
        url_truncated, did_truncate = sanitize_url(url)
        if did_truncate:
            # Save the full url in info to limit the loss of information
            info = u"<WOM had to truncate the following URL: %s>" % url
            logger.warning("Found an url of length %d (>%d) \
when importing references from feed." % (len(url), URL_MAX_LENGTH))
        url = url_truncated
        # set the title only for new ref (should avoid weird behaviour
        # from the user point of view)
        title = truncate_reference_title(
          HTMLUnescape(entry.get("title") \
                       or strip_tags(entry.get("description")) \
                       or url))
        ref = Reference(url=url, title=title)
    else:
        ref = previous_ref
    ref.description = " ".join((info, entry.get("description", "")))
    ref.pub_date = date
    return (ref, tags)
Пример #3
0
 def save(self):
     """Warning: the source will be saved as well as the related objects
 (no commit options).
 Returns the source.
 """
     form_url, _ = sanitize_url(self.cleaned_data["url"])
     form_title = self.cleaned_data["title"]
     form_feed_url, _ = sanitize_url(self.cleaned_data["feed_url"])
     if self.user.userprofile.web_feeds.filter(
             source__url=form_url).exists():
         # nothing to do
         return
     # try a bigger look-up anyway
     same_sources = WebFeed.objects.filter(source__url=form_url).all()
     # url are unique for sources
     if same_sources:
         new_feed = same_sources[0]
     else:
         if form_title:
             source_title = form_title
         else:
             source_title = build_reference_title_from_url(form_url)
         try:
             source_ref = Reference.objects.get(url=form_url)
         except ObjectDoesNotExist:
             source_ref = Reference(url=form_url,
                                    title=source_title,
                                    pub_date=datetime.now(timezone.utc))
             source_ref.save()
         new_feed = WebFeed(source=source_ref)
         # assume that either form_feed_url or form_url have been
         # validated as a valid feed url
         new_feed.xmlURL = form_feed_url or form_url
         new_feed.last_update_check = datetime.utcfromtimestamp(0)\
                                              .replace(tzinfo=timezone.utc)
         new_feed.save()
     self.user.userprofile.sources.add(source_ref)
     self.user.userprofile.public_sources.add(source_ref)
     self.user.userprofile.web_feeds.add(new_feed)
     return new_feed
Пример #4
0
def import_feedsources_from_opml(opml_txt):
    """
  Save in the db the FeedSources found in the OPML-formated text.
  opml_txt: a unicode string representing the content of a full OPML file.
  Return a dictionary assiociating each feed with a set of tags {feed:tagSet,...).
  """
    collected_feeds, _ = parse_opml(opml_txt, False)
    db_new_feedsources = []
    feeds_and_tags = []
    newly_referenced_source = []
    for current_feed in collected_feeds:
        try:
            feed_source = WebFeed.objects.get(xmlURL=current_feed.xmlUrl)
        except MultipleObjectsReturned:
            feed_source = WebFeed.objects.all()[0]
        except ObjectDoesNotExist:
            url_id = current_feed.htmlUrl or current_feed.xmlUrl
            try:
                ref = Reference.objects.get(url=url_id)
            except ObjectDoesNotExist:
                ref = Reference(url=url_id,
                                title=HTMLUnescape(current_feed.title),
                                pub_date=datetime.now(timezone.utc))
                ref.save()
            feed_source = WebFeed(source=ref, xmlURL=current_feed.xmlUrl)
            feed_source.last_update_check = datetime.utcfromtimestamp(0)\
                                                    .replace(tzinfo=timezone.utc)
            newly_referenced_source.append(ref)
            db_new_feedsources.append(feed_source)
        feeds_and_tags.append((feed_source, current_feed.tags))
    with transaction.commit_on_success():
        for f in db_new_feedsources:
            f.save()
            # make sure to record the fact
        for r in newly_referenced_source:
            r.save_count += 1
            r.save()
    return dict(feeds_and_tags)
Пример #5
0
 def save(self):
     """Warning: the bookmark will be saved as well as the related objects
 (no commit options).
 Returns the bookmark.
 """
     url, _ = sanitize_url(self.cleaned_data["url"])
     title = self.cleaned_data["title"] \
             or build_reference_title_from_url(url)
     comment = self.cleaned_data["comment"]
     pub_date = self.cleaned_data["pub_date"] \
                or datetime.now(timezone.utc)
     src_url,_ = sanitize_url(self.cleaned_data["source_url"] \
                              or build_source_url_from_reference_url(url))
     src_title = self.cleaned_data["source_title"] \
                or build_reference_title_from_url(src_url)
     # Find or create a matching reference
     try:
         bookmarked_ref = Reference.objects.get(url=url)
         # Arbitrarily chose one of the possible sources
         src_query = bookmarked_ref.sources
         if src_query.count() > 1:
             ref_src = src_query.all()[0]
         else:
             ref_src = src_query.get()
     except ObjectDoesNotExist:
         try:
             ref_src = Reference.objects.get(url=src_url)
         except ObjectDoesNotExist:
             ref_src = Reference(url=src_url,
                                 title=src_title,
                                 pub_date=pub_date)
             ref_src.save()
         if src_url == url:
             bookmarked_ref = ref_src
         else:
             bookmarked_ref = Reference(url=url,
                                        title=title,
                                        pub_date=pub_date)
             bookmarked_ref.save()
             bookmarked_ref.sources.add(ref_src)
     with transaction.commit_on_success():
         try:
             bmk = UserBookmark.objects.get(owner=self.user,
                                            reference=bookmarked_ref)
         except ObjectDoesNotExist:
             bmk = UserBookmark(owner=self.user,
                                reference=bookmarked_ref,
                                saved_date=datetime.now(timezone.utc))
             bookmarked_ref.save_count += 1
             bmk.save()
             bookmarked_ref.save()
         # allow the user-specific comment to be changed and also prefix
         # it with the user specified title if it differs from the
         # existing reference title.
         if comment:
             new_comment = comment
         else:
             new_comment = bmk.comment
         if self.cleaned_data["title"] and title!=bookmarked_ref.title \
            and not new_comment.startswith(title):
             new_comment = "%s: %s" % (title, new_comment)
         if new_comment != bmk.comment:
             bmk.comment = new_comment
             bmk.save()
     with transaction.commit_on_success():
         if ref_src not in self.user.userprofile.sources.all():
             self.user.userprofile.sources.add(ref_src)
     with transaction.commit_on_success():
         for rust in ReferenceUserStatus\
           .objects.filter(owner=self.user,
                           reference=bookmarked_ref).all():
             rust.has_been_saved = True
             rust.save()
     return bmk
Пример #6
0
def import_references_from_ns_bookmark_list(nsbmk_txt):
  """Extract bookmarks from a Netscape-style bookmark file and save
  them as Reference instances in the database.
  
  nsbmk_txt: a unicode string representing the full content of a
  Netscape-style bookmark file.

  Return a dictionary mapping each reference with the BookmarkMetadata
  associated to it according to the input content.
  """
  date_now = datetime.datetime.now(timezone.utc)
  # Parse the file
  collected_bmks = parse_netscape_bookmarks(nsbmk_txt)
  if not collected_bmks:
    return {}
  # Make sure that the source common to all the following import
  # exists or create it to be able to link new references to it.
  source_url = "#internal-bookmark-import"
  try:
    common_source = Reference.objects.get(url=source_url)
  except ObjectDoesNotExist:
    common_source = Reference(url=source_url,
                              title="Bookmark Import",
                              pub_date=date_now)
    common_source.save()
  new_refs  = []
  ref_and_metadata = []
  new_ref_by_url = {}
  for bmk_info in collected_bmks:
    u = bmk_info["url"]
    if not u:
      logger.warning("Skipping a bookmark that has an empty URL.")
      continue
    info = ""
    u_truncated, did_truncate = sanitize_url(u)
    if did_truncate:
      # Save the full url in info to limit the loss of information
      info = u"<WOM had to truncate the following URL: %s>" % u
      logger.warning("Found an url of length %d (>%d) \
when importing Netscape-style bookmark list." % (len(u),URL_MAX_LENGTH))
    u = u_truncated
    t = bmk_info.get("title") or build_reference_title_from_url(u)
    if "posix_timestamp" in bmk_info:
      d = datetime.datetime\
                  .utcfromtimestamp(float(bmk_info["posix_timestamp"]))\
                  .replace(tzinfo=timezone.utc)
    else:
      d = date_now
    if u in new_ref_by_url:
      ref = new_ref_by_url[u]
    else:
      try:
        ref = Reference.objects.get(url=u)
      except ObjectDoesNotExist:
        ref = Reference(url=u,title=truncate_reference_title(t),
                        pub_date=d,description=info)
        new_refs.append(ref)
        new_ref_by_url[u] = ref
    meta = BookmarkMetadata(bmk_info.get("note",""),
                            set(bmk_info.get("tags","").split(",")),
                            bmk_info.get("private","0")=="0")
    ref_and_metadata.append((ref,meta))
  with transaction.commit_on_success():
    for ref in new_refs:
      ref.save()
      ref.sources.add(common_source)
  # Note: We have to wait until now to convert the list to a dict,
  # because only now will the model instances have their specific ids and
  # hashes (before that they would have looked the same for the dict).
  return dict(ref_and_metadata)