示例#1
0
 def process(self):
     tags_query = Tag.query.all()
     tags_dictionary = {}
     for individ_tag in tags_query:
         tags_dictionary[individ_tag.text] = individ_tag
     created_tags = {}
     bookmarks = self.data['bookmarks']
     for bookmark in bookmarks:
         title = bookmark['article__title']
         url = bookmark['article__url']
         dt = arrow.get(bookmark['date_added']).naive
         tags = ['Imported']
         if bookmark['article__excerpt'] is not None:
             description = self.html_parser.unescape(bookmark['article__excerpt'])
         else:
             description = None
         self.urls[url] = {
             'title': title,
             'added_on': dt,
             'tags': tags,
             'description': description
         }
     if 'Imported' in tags_dictionary:
         self.tags_dict['Imported'] = {'obj': tags_dictionary['Imported']}
     else:
         new_tag = Tag()
         new_tag.text = 'Imported'
         db.session.add(new_tag)
         db.session.commit()
         self.tags_dict[new_tag.text] = {'obj': new_tag}
示例#2
0
 def process(self):
     tags_query = Tag.query.all()  # Get all the tags
     tags_dictionary = {}  # Storing objects for existing tags in here.
     for individ_tag in tags_query:
         tags_dictionary[individ_tag.text] = individ_tag  # Store all existing tags in dict
     created_tags = {}  # Store all tags that had to be created
     for link in self.data:
         url = link['href']
         if not self.valid_url.match(url):  # If the url is not valid, burn it
             pass
         else:
             try:
                 title = link['description']
             except KeyError:
                 title = link['href']
             try:
                 tags = link['tags'].split(' ')
             except KeyError:
                 tags = []
             tags = filter(None, tags)  # Remove empty strings from tag list
             tags.append('Imported')  # This is used as a way of keeping track of Imported bookmarks in the db
             try:
                 # Pinboard stores timestamps in ISO format in UTC
                 added_timestamp = datetime.datetime.strptime(link['time'], '%Y-%m-%dT%H:%M:%SZ')
             except KeyError:
                 added_timestamp = datetime.datetime.utcnow()  # This should never happen, but just in case
             try:
                 # Don't want empty strings in the database, creates need for extra logic in the templates
                 description = link['extended'] if link['extended'] != '' else None
             except KeyError:
                 description = None
             self.urls[url] = {
                 'title': title,
                 'tags': tags,
                 'added_on': added_timestamp,
                 'description': description
             }
             for tag in tags:
                 self.tags_set.add(tag)  # Add all tags from current bookmark to the master tag set. No dupes.
     for tag in self.tags_set:
         if tag in self.tags_dict:  # This tag has already been processed, and is in dict
             pass
         elif tag in created_tags:  # We won't be handle created tags in this loop
             pass
         elif tag in tags_dictionary:  # This tag already exists in the database
             self.tags_dict[tag] = {'obj': tags_dictionary[tag]}  # Add the tag to the main tags object dictionary
         else:  # If all else fails, create this tag
             created_tags[tag] = ''
             new_tag = Tag()
             new_tag.text = tag
             db.session.add(new_tag)
     db.session.commit()  # This takes pretty long
     q = Tag.query.all()
     for tag in q:  # Here we check if this was one of the tags that had to be created, and add to main tags dict
         if tag.text in created_tags:
             self.tags_dict[tag.text] = {'obj': tag}
示例#3
0
 def process(self):
     tags_query = Tag.query.all()  # Get all the tags
     tags_dictionary = {}  # Storing objects for existing tags in this dict
     for this_tag in tags_query:
         tags_dictionary[this_tag.text] = this_tag  # Store all existing tags in dict
     created_tags = {}  # Store all tags that had to be created
     urls = self.soup.findAll('a')
     for link in urls:
         url = link['href']
         if not self.valid_url.match(url):
             pass
         else:
             if url in self.urls:
                 pass
             else:
                 tags = ['Imported']
                 if link.has_key('add_date'):
                     added_timestamp = int(link['add_date'])
                 else:
                     added_timestamp = time.time()
                 self.urls[url] = {
                     'title': link.text,
                     'tags': tags,
                     'added_on': datetime.datetime.utcfromtimestamp(
                         added_timestamp)
                 }
     self.tags_set.add('Imported')
     for tag in self.tags_set:
         if tag in self.tags_dict:  # Tag has already been processed
             pass
         elif tag in created_tags:
             pass
         elif tag in tags_dictionary:
             self.tags_dict[tag] = {'obj': tags_dictionary[tag]}
         else:
             created_tags[tag] = ''
             new_tag = Tag()
             new_tag.text = tag
             db.session.add(new_tag)
     db.session.commit()
     q = Tag.query.all()
     for tag in q:
         if tag.text in created_tags:
             self.tags_dict[tag.text] = {'obj': tag}
示例#4
0
 def process(self):
     tags_query = Tag.query.all()
     tags_dictionary = dict()
     for this_tag in tags_query:
         tags_dictionary[this_tag.text] = this_tag
     created_tags = dict()
     for tag in self.soup.find_all('h1'):
         self.tags_set.add(tag.text)
         parent_elem = tag.find_next_sibling('ol')
         links = parent_elem.find_all('a')
         for link in links:
             if not self.valid_url.match(link['href']):
                 pass
             else:
                 title = link.text
                 url = link['href']
                 tags = [tag.text]
                 tags.append('Imported')
                 #  Thanks Instapaper for not adding timestamps
                 added_timestamp = datetime.datetime.utcnow()
                 self.urls[url] = {
                     'title': title,
                     'tags': tags,
                     'added_on': added_timestamp
                 }
     self.tags_set.add('Imported')
     for tag in self.tags_set:
         if tag in self.tags_dict:
             pass
         elif tag in created_tags:
             pass
         elif tag in tags_dictionary:
             self.tags_dict[tag] = {'obj': tags_dictionary[tag]}
         else:
             created_tags[tag] = ''
             new_tag = Tag()
             new_tag.text = tag
             db.session.add(new_tag)
     db.session.commit()
     q = Tag.query.all()
     for tag in q:
         if tag.text in created_tags:
             self.tags_dict[tag.text] = {'obj': tag}
示例#5
0
 def process(self):
     tags_query = Tag.query.all()
     tags_dictionary = dict()
     for this_tag in tags_query:
         tags_dictionary[this_tag.text] = this_tag
     created_tags = dict()
     for link in self.soup.find_all('a'):
         title = link.text
         url = link['href']
         dt = arrow.get(link['time_added']).naive
         tags = link['tags'].split(',')
         tags.append('Imported')
         tags = filter(None, tags)
         for tag in tags:
             self.tags_set.add(tag)
         self.urls[url] = {
             'title': title,
             'tags': tags,
             'added_on': dt
         }
     for tag in self.tags_set:
         if tag in self.tags_dict:  # Tag has already been processed
             pass
         elif tag in created_tags:
             pass
         elif tag in tags_dictionary:
             self.tags_dict[tag] = {'obj': tags_dictionary[tag]}
         else:
             created_tags[tag] = ''
             new_tag = Tag()
             new_tag.text = tag
             db.session.add(new_tag)
     db.session.commit()
     q = Tag.query.all()
     for tag in q:
         if tag.text in created_tags:
             self.tags_dict[tag.text] = {'obj': tag}