def handle(self, *args, **options): """ Gets the data for active feeds by url, saves the feed entries to the database """ # Delete all previous data first WordType.delete_all() Word.delete_all() Entry.delete_all() self.stdout.write('Counting words...') entry_data, feed_data, word_data, entry_url_lookup = Word.count_words() self.stdout.write('Counting complete.') self.stdout.write('Saving data...') # Save words words = [Word.create(word, number) for word, number in word_data.items()] Word.objects.bulk_create(words) # Save entries Entry.objects.bulk_create(entry_url_lookup.values()) # Lookups used to create WordType objects words_lookup = dict(Word.objects.all().values_list('word', 'id')) feeds_lookup = {item.id: item for item in Feed.objects.active_feeds()} entries_lookup = {item.url: item for item in Entry.objects.all()} # Save WordType objects feed_word_types = Word.create_word_types(feed_data, feeds_lookup, words_lookup) entry_word_types = Word.create_word_types(entry_data, entries_lookup, words_lookup) WordType.objects.bulk_create(feed_word_types + entry_word_types) self.stdout.write('Complete!')
def __add_entries(entries, feed): """ Private add entries func. Adds entries to a feed without repeating them. Don't downloads entry, if there is entry with such title from another feed. Instead it uses that entry. """ for entry in entries: try: # If there is entry with such title in this feed Entry.objects.get(title=entry.title, feed=feed) continue except Entry.DoesNotExist: pass # Try to find another entries with such title e = Entry.objects.filter(title=entry.title) # If found if len(e) != 0: e = e[0] # Copy all containing entry_obj = Entry(title=e.title, description=e.description, entry=e.entry, feed=feed) entry_obj.save() # Or create new Entry from scratch else: entry_name = entry.title + '.html' # If bad link or entry name try: urlretrieve(entry.link, entry_name) entry_file = open(entry_name) entry_file = File(entry_file) entry_obj = Entry(title=entry.title, description=entry.description, entry=entry_file, feed=feed) entry_obj.save() os.remove(entry_name) except: # Go to next entry continue