def get_active_annotations(self): ''' For each annotation, construct an AnnotationStruct object with the highlight's metadata. Starred items are minimally required. Dashed items (highlight_text and note_text) may be one or both. AnnotationStruct properties: annotation_id: an int uniquely identifying the annotation *book_id: The book this annotation is associated with highlight_color: [Blue|Gray|Green|Pink|Purple|Underline|Yellow] -highlight_text: A list of paragraphs constituting the highlight last_modification: The timestamp of the annotation location: location of highlight in the book -note_text: A list of paragraphs constituting the note *timestamp: Unique timestamp of highlight's creation/modification time ''' # Sample annotations, indexed by timestamp. Note that annotations may have # highlight_text, note_text, or both. dict_of_anns = {} ts = datetime.datetime(2012, 12, 4, 8, 15, 0) dict_of_anns[time.mktime(ts.timetuple())] = { 'book_id': 1, 'highlight_color': 'Gray', 'highlight_text': [ 'The first paragraph of the first highlight.', 'The second paragaph of the first highlight.' ], } ts = ts.replace(minute=16) dict_of_anns[time.mktime(ts.timetuple())] = { 'book_id': 1, 'highlight_color': 'Gray', 'highlight_text': [ 'The first paragraph of the second highlight.', 'The second paragaph of the second highlight.' ], 'note_text': ['A note added to the second highlight'] } ts = ts.replace(minute=17) dict_of_anns[time.mktime(ts.timetuple())] = { 'book_id': 1, 'highlight_color': 'Gray', 'note_text': ['A note added to the third highlight'] } ts = datetime.datetime(2012, 12, 10, 9, 0, 0) dict_of_anns[time.mktime(ts.timetuple())] = { 'book_id': 2, 'highlight_color': 'Gray', 'highlight_text': [ 'The first paragraph of the first highlight.', 'The second paragaph of the first highlight.' ] } ts = ts.replace(minute=1) dict_of_anns[time.mktime(ts.timetuple())] = { 'book_id': 2, 'highlight_color': 'Gray', 'highlight_text': [ 'The first paragraph of the second highlight.', 'The second paragaph of the second highlight.' ], 'note_text': ['A note added to the second highlight'] } ts = ts.replace(minute=2) dict_of_anns[time.mktime(ts.timetuple())] = { 'book_id': 2, 'highlight_color': 'Gray', 'note_text': ['A note added to the third highlight'] } ts = datetime.datetime(2012, 12, 31, 23, 59, 0) dict_of_anns[time.mktime(ts.timetuple())] = { 'book_id': 999, 'highlight_color': 'Gray', 'highlight_text': ['An orphan annotation (no book)'] } self._log("%s:get_active_annotations()" % self.app_name) self.opts.pb.set_label("Getting active annotations for %s" % self.app_name) self.opts.pb.set_value(0) annotations_db = self.generate_annotations_db_name( self.app_name_, self.opts.device_name) books_db = self.generate_books_db_name(self.app_name_, self.opts.device_name) # Create the annotations table self.create_annotations_table(annotations_db) # Initialize the progress bar self.opts.pb.set_label("Getting highlights from %s" % self.app_name) self.opts.pb.set_value(0) self.opts.pb.set_maximum(len(dict_of_anns)) # Add annotations to the database for timestamp in sorted(dict_of_anns.iterkeys()): # Populate an AnnotationStruct with available data ann_mi = AnnotationStruct() # Required items ann_mi.book_id = dict_of_anns[timestamp]['book_id'] ann_mi.last_modification = timestamp # Optional items if 'annotation_id' in dict_of_anns[timestamp]: ann_mi.annotation_id = dict_of_anns[timestamp]['annotation_id'] if 'highlight_color' in dict_of_anns[timestamp]: ann_mi.highlight_color = dict_of_anns[timestamp][ 'highlight_color'] if 'highlight_text' in dict_of_anns[timestamp]: highlight_text = '\n'.join( dict_of_anns[timestamp]['highlight_text']) ann_mi.highlight_text = highlight_text if 'note_text' in dict_of_anns[timestamp]: note_text = '\n'.join(dict_of_anns[timestamp]['note_text']) ann_mi.note_text = note_text # Add annotation to annotations_db self.add_to_annotations_db(annotations_db, ann_mi) # Increment the progress bar self.opts.pb.increment() # Update last_annotation in books_db self.update_book_last_annotation(books_db, timestamp, ann_mi.book_id) # Update the timestamp self.update_timestamp(annotations_db) self.commit()
def get_active_annotations(self): self._log("%s:get_active_annotations()" % self.app_name) self.opts.pb.set_label("Getting active annotations for %s" % self.app_name) self.opts.pb.set_value(0) db_profile = self._localize_database_path(self.app_id, self.annotations_subpath) self.annotations_db = db_profile['path'] # Test timestamp against cached value cached_db = self.generate_annotations_db_name(self.app_name_, self.ios.device_name) books_db = self.generate_books_db_name(self.app_name_, self.ios.device_name) if self.opts.disable_caching or not self._cache_is_current( db_profile['stats'], cached_db): self._log(" fetching annotations from %s on %s" % (self.app_name, self.ios.device_name)) # Create the annotations table as needed self.create_annotations_table(cached_db) obsolete_bookmarks = 0 deleted_bookmarks = 0 con = sqlite3.connect(self.annotations_db) with con: con.row_factory = sqlite3.Row cur = con.cursor() cur.execute('''SELECT * FROM Highlights ORDER BY NoteDateTime ''') rows = cur.fetchall() self.opts.pb.set_maximum(len(rows)) for row in rows: self.opts.pb.increment() book_id = row[b'BookID'] if not book_id in self.installed_books: obsolete_bookmarks += 1 continue # Collect the markup/highlight count for all installed books if row[b'Deleted'] == 1: deleted_bookmarks += 1 continue this_is_news = self.collect_news_clippings and 'News' in self.get_genres( books_db, book_id) # Sanitize text, note to unicode highlight_text = re.sub('\xa0', ' ', row[b'Text']) highlight_text = UnicodeDammit(highlight_text).unicode highlight_text = highlight_text.rstrip('\n').split('\n') while highlight_text.count(''): highlight_text.remove('') highlight_text = [line.strip() for line in highlight_text] note_text = None if row[b'Note']: ntu = UnicodeDammit(row[b'Note']).unicode note_text = ntu.rstrip('\n') # Populate an AnnotationStruct a_mi = AnnotationStruct() a_mi.annotation_id = row[b'UUID'] a_mi.book_id = book_id a_mi.highlight_color = self.HIGHLIGHT_COLORS[ row[b'Colour']] a_mi.highlight_text = '\n'.join(highlight_text) a_mi.last_modification = row[b'NoteDateTime'] section = str(int(row[b'Section']) - 1) try: a_mi.location = self.tocs[book_id][section] except: if this_is_news: a_mi.location = self.get_title(books_db, book_id) else: a_mi.location = "Section %s" % row[b'Section'] a_mi.note_text = note_text # If empty highlight_text and empty note_text, not a useful annotation if not highlight_text and not note_text: continue # Generate location_sort if this_is_news: a_mi.location_sort = row[b'NoteDateTime'] else: interior = self._generate_interior_location_sort( row[b'StartXPath']) if not interior: self._log("Marvin: unable to parse xpath:") self._log(row[b'StartXPath']) self._log(a_mi) continue a_mi.location_sort = "%04d.%s.%04d" % (int( row[b'Section']), interior, int( row[b'StartOffset'])) # Add annotation self.add_to_annotations_db(cached_db, a_mi) # Update last_annotation in books_db self.update_book_last_annotation(books_db, row[b'NoteDateTime'], book_id) # Update the timestamp self.update_timestamp(cached_db) self.commit() else: self._log(" retrieving cached annotations from %s" % cached_db)
def _process_individual_book(book): book_mi = BookStruct() book_mi['reader_app'] = self.app_name book_mi['cid'] = None for md in xl: book_mi[xl[md]] = book.get(md) book_mi['active'] = True book_mi['annotations'] = 0 subjects = book.find('subjects') if subjects is not None: sl = [s.text for s in subjects] book_mi['genre'] = ', '.join(sl) this_is_news = False if 'News' in book_mi['genre']: if not self.collect_news_clippings: return this_is_news = True # Get the last update, count active annotations last_update = 0 hls = book.find('highlights') for hl in hls: this_ts = hl.get('datetime') if this_ts > last_update: last_update = this_ts if hl.get('deleted') == '0': book_mi['annotations'] += 1 book_mi['last_update'] = float(last_update) # Get the library cid, confidence toc_entries = None if this_is_news: cid = self.news_clippings_cid confidence = 5 else: cid, confidence = self.parent.generate_confidence(book_mi) if confidence >= 2: toc_entries = self._get_epub_toc(cid=cid) # Add annotated book to the db, master_list if len(hls): self.add_to_books_db(self.books_db, book_mi) self.annotated_book_list.append(book_mi) # Add the active annotations for this book to the db highlights = {} for hl in hls: if hl.get('deleted') == '1': continue datetime = hl.get('datetime') highlights[datetime] = {} for md in [ 'text', 'note', 'color', 'key', 'deleted', 'section', 'startx', 'startoffset' ]: highlights[datetime][md] = hl.get(md) sorted_keys = sorted(highlights.iterkeys()) for datetime in sorted_keys: highlight_text = highlights[datetime]['text'] note_text = highlights[datetime]['note'] # Populate an AnnotationStruct a_mi = AnnotationStruct() a_mi.annotation_id = highlights[datetime]['key'] a_mi.book_id = book_mi['book_id'] a_mi.highlight_color = self.HIGHLIGHT_COLORS[int( highlights[datetime]['color'])] a_mi.highlight_text = highlight_text a_mi.last_modification = datetime try: section = str(int(highlights[datetime]['section']) - 1) a_mi.location = toc_entries[section] except: if this_is_news: a_mi.location = book_mi['title'] else: a_mi.location = "Section %s" % highlights[ datetime]['section'] a_mi.note_text = note_text # If empty highlight_text and empty note_text, not a useful annotation if (not highlight_text.strip() and not note_text.strip()): continue # Generate location_sort if this_is_news: a_mi.location_sort = datetime else: interior = self._generate_interior_location_sort( highlights[datetime]['startx']) if not interior: self._log("Marvin: unable to parse xpath:") self._log(" %s" % highlights[datetime]['startx']) self._log(a_mi) continue a_mi.location_sort = "%04d.%s.%04d" % ( int(highlights[datetime]['section']), interior, int(highlights[datetime]['startoffset'])) self.add_to_annotations_db(self.annotations_db, a_mi) self.update_book_last_annotation(self.books_db, datetime, book_mi['book_id']) # Update the timestamps self.update_timestamp(self.annotations_db) self.update_timestamp(self.books_db) self.commit()
def parse_exported_highlights(self, raw): """ Extract highlights from pasted Annotations summary, add them to selected book in calibre library Construct a BookStruct object with the book's metadata. Starred items are minimally required. BookStruct properties: *active: [True|False] *author: "John Smith" author_sort: (if known) *book_id: an int uniquely identifying the book. Highlights are associated with books through book_id genre: "Fiction" (if known) *title: "The Story of John Smith" title_sort: "Story of John Smith, The" (if known) uuid: Calibre's uuid for this book, if known Construct an AnnotationStruct object with the highlight's metadata. Starred items are minimally required. Dashed items (highlight_text and note_text) may be one or both. AnnotationStruct properties: annotation_id: an int uniquely identifying the annotation *book_id: The book this annotation is associated with highlight_color: [Blue|Gray|Green|Pink|Purple|Underline|Yellow] -highlight_text: A list of paragraphs constituting the highlight last_modification: The timestamp of the annotation location: location of highlight in the book -note_text: A list of paragraphs constituting the note *timestamp: Unique timestamp of highlight's creation/modification time """ self._log("%s:parse_exported_highlight()" % self.app_name) # Create the annotations, books table as needed self.annotations_db = "%s_imported_annotations" % self.app_name_ self.create_annotations_table(self.annotations_db) self.books_db = "%s_imported_books" % self.app_name_ self.create_books_table(self.books_db) self.annotated_book_list = [] self.selected_books = None # Generate the book metadata from the selected book row = self.opts.gui.library_view.currentIndex() book_id = self.opts.gui.library_view.model().id(row) db = self.opts.gui.current_db mi = db.get_metadata(book_id, index_is_id=True) # Populate author, title at a minimum title = "A Book With Some Exported Annotations" author = "John Smith" # Populate a BookStruct book_mi = BookStruct() book_mi.active = True book_mi.author = author book_mi.book_id = mi.id book_mi.title = title book_mi.uuid = None book_mi.last_update = time.mktime(time.localtime()) book_mi.reader_app = self.app_name book_mi.cid = mi.id book_mi.annotations = len(self.highlights) # Add annotations to the database for timestamp in sorted(self.highlights.keys()): book_mi.last_update = timestamp # Populate an AnnotationStruct ann_mi = AnnotationStruct() # Required items ann_mi.book_id = book_mi['book_id'] ann_mi.last_modification = timestamp # Optional items if 'annotation_id' in self.highlights[timestamp]: ann_mi.annotation_id = self.highlights[timestamp]['annotation_id'] if 'highlight_color' in self.highlights[timestamp]: ann_mi.highlight_color = self.highlights[timestamp]['highlight_color'] if 'highlight_text' in self.highlights[timestamp]: highlight_text = '\n'.join(self.highlights[timestamp]['highlight_text']) ann_mi.highlight_text = highlight_text if 'note_text' in self.highlights[timestamp]: note_text = '\n'.join(self.highlights[timestamp]['note_text']) ann_mi.note_text = note_text # Add annotation to annotations_db self.add_to_annotations_db(self.annotations_db, ann_mi) # Increment the progress bar self.opts.pb.increment() # Update last_annotation in books_db self.update_book_last_annotation(self.books_db, timestamp, ann_mi.book_id) # Add book to books_db self.add_to_books_db(self.books_db, book_mi) self.annotated_book_list.append(book_mi) # Update the timestamp self.update_timestamp(self.annotations_db) self.update_timestamp(self.books_db) self.commit() # Return True if successful return True
def get_active_annotations(self): ''' For each annotation, construct an AnnotationStruct object with the highlight's metadata. Starred items are minimally required. Dashed items (highlight_text and note_text) may be one or both. AnnotationStruct properties: annotation_id: an int uniquely identifying the annotation *book_id: The book this annotation is associated with highlight_color: [Blue|Gray|Green|Pink|Purple|Underline|Yellow] -highlight_text: A list of paragraphs constituting the highlight last_modification: The timestamp of the annotation location: location of highlight in the book -note_text: A list of paragraphs constituting the note *timestamp: Unique timestamp of highlight's creation/modification time ''' self._log("%s:get_active_annotations()" % self.app_name) self.active_annotations = {} self.opts.pb.set_label("Getting active annotations for %s" % self.app_name) self.opts.pb.set_value(0) # Don't change the template of the _db strings #self.books_db = "%s_books_%s" % (re.sub(' ', '_', self.app_name), re.sub(' ', '_', self.opts.device_name)) #self.annotations_db = "%s_annotations_%s" % (re.sub(' ', '_', self.app_name), re.sub(' ', '_', self.opts.device_name)) self.annotations_db = self.generate_annotations_db_name(self.app_name_, self.opts.device_name) self.books_db = self.generate_books_db_name(self.app_name_, self.opts.device_name) # Create the annotations table self.create_annotations_table(self.annotations_db) # Parse MyClippings.txt for entries matching installed_books self._parse_my_clippings() # Initialize the progress bar self.opts.pb.set_label("Getting highlights from %s" % self.app_name) self.opts.pb.set_value(0) self.opts.pb.show() self.opts.pb.set_maximum(len(self.active_annotations)) # Add annotations to the database for timestamp in sorted(self.active_annotations.iterkeys()): # Populate an AnnotationStruct with available data ann_mi = AnnotationStruct() # Required items ann_mi.book_id = self.active_annotations[timestamp]['book_id'] ann_mi.last_modification = timestamp this_is_news = self.collect_news_clippings and 'News' in self.get_genres(self.books_db, ann_mi.book_id) # Optional items if 'annotation_id' in self.active_annotations[timestamp]: ann_mi.annotation_id = self.active_annotations[timestamp]['annotation_id'] if 'highlight_color' in self.active_annotations[timestamp]: ann_mi.highlight_color = self.active_annotations[timestamp]['highlight_color'] if 'highlight_text' in self.active_annotations[timestamp]: highlight_text = '\n'.join(self.active_annotations[timestamp]['highlight_text']) ann_mi.highlight_text = highlight_text if this_is_news: ann_mi.location = self.get_title(self.books_db, ann_mi.book_id) ann_mi.location_sort = timestamp else: if 'location' in self.active_annotations[timestamp]: ann_mi.location = self.active_annotations[timestamp]['location'] if 'location_sort' in self.active_annotations[timestamp]: ann_mi.location_sort = self.active_annotations[timestamp]['location_sort'] if 'note_text' in self.active_annotations[timestamp]: note_text = '\n'.join(self.active_annotations[timestamp]['note_text']) ann_mi.note_text = note_text # Add annotation to self.annotations_db self.add_to_annotations_db(self.annotations_db, ann_mi) # Increment the progress bar self.opts.pb.increment() # Update last_annotation in self.books_db self.update_book_last_annotation(self.books_db, timestamp, ann_mi.book_id) self.opts.pb.hide() # Update the timestamp self.update_timestamp(self.annotations_db) self.commit()
def get_active_annotations(self): ''' For each annotation, construct an AnnotationStruct object with the highlight's metadata. Starred items are minimally required. Dashed items (highlight_text and note_text) may be one or both. AnnotationStruct properties: annotation_id: an int uniquely identifying the annotation *book_id: The book this annotation is associated with highlight_color: [Blue|Gray|Green|Pink|Purple|Underline|Yellow] -highlight_text: A list of paragraphs constituting the highlight last_modification: The timestamp of the annotation location: location of highlight in the book -note_text: A list of paragraphs constituting the note *timestamp: Unique timestamp of highlight's creation/modification time ''' self._log("%s:get_active_annotations()" % self.app_name) self.opts.pb.set_label("Getting active annotations for %s" % self.app_name) self.opts.pb.set_value(0) db_profile = self._localize_database_path(self.app_id, self.annotations_subpath) self.annotations_db = db_profile['path'] # Test timestamp against cached value cached_db = self.generate_annotations_db_name(self.app_name_, self.ios.device_name) books_db = self.generate_books_db_name(self.app_name_, self.ios.device_name) if self.opts.disable_caching or not self._cache_is_current( db_profile['stats'], cached_db): self._log(" fetching annotations from %s on %s" % (self.app_name, self.ios.device_name)) # Create the annotations table as needed self.create_annotations_table(cached_db) con = sqlite3.connect(self.annotations_db) with con: con.row_factory = sqlite3.Row cur = con.cursor() cur.execute('''SELECT book_oid, last_access, book_annotations.location, book_annotations.book_position, note, book_annotations.oid as ba_oid FROM book_annotations JOIN book ON book.oid = book_annotations.book_oid ORDER BY book_annotations.book_position ''') rows = cur.fetchall() self.opts.pb.set_maximum(len(rows)) annotations = {} timestamp = None for row in rows: self.opts.pb.increment() book_id = row[b'book_oid'] if not book_id in self.installed_books: continue # Annotations are quoted. Anything afterwards is a note. # Assuming that the user hasn't edited the opening/closing quotes, # we can assume that a sequence of '"\n' is a valid split point. full_annotation = row[b'note'] highlight_text = None note_text = None if full_annotation.startswith( '"') and full_annotation.endswith('"'): # Highlight only - strip opening/closing quotes highlight_text = [full_annotation[1:-1]] elif '"\n' in full_annotation: # Presumed to be a hybrid highlight/note, separated by closing quote/LF tokens = full_annotation.split('"\n') highlight_text = [tokens[0][1:]] note_text = tokens[1].split('\n') else: # User manually removed the quotes, assume it's just a note note_text = full_annotation.split('\n') # Populate an AnnotationStruct a_mi = AnnotationStruct() a_mi.annotation_id = row[b'ba_oid'] a_mi.book_id = book_id a_mi.epubcfi = row[b'location'] a_mi.highlight_color = 'Yellow' if highlight_text: a_mi.highlight_text = '\n'.join(highlight_text) if note_text: a_mi.note_text = '\n'.join(note_text) section = self._get_spine_index(a_mi.epubcfi) try: a_mi.location = self.tocs[book_id]["%.0f" % (section)] except: a_mi.location = "Section %d" % section a_mi.location_sort = row[b'book_position'] # Stanza doesn't timestamp individual annotations # Space them 1 second apart timestamp = row[b'last_access'] while timestamp in annotations: timestamp += 1 a_mi.last_modification = timestamp + self.NSTimeIntervalSince1970 annotations[timestamp] = a_mi for timestamp in annotations: self.add_to_annotations_db(cached_db, annotations[timestamp]) # Update last_annotation in books_db if timestamp: self.update_book_last_annotation(books_db, timestamp, book_id) self.update_timestamp(cached_db) self.commit() else: self._log(" retrieving cached annotations from %s" % cached_db)
def parse_exported_highlights(self, raw, log_failure=True): """ Extract highlights from pasted Annotation summary email Return True if no problems Return False if error """ # Create the annotations, books table as needed self.annotations_db = "%s_imported_annotations" % self.app_name_ self.create_annotations_table(self.annotations_db) self.books_db = "%s_imported_books" % self.app_name_ self.create_books_table(self.books_db) self.annotated_book_list = [] self.selected_books = None self._log("raw highlights: {0}".format(raw)) # Generate the book metadata from the selected book row = self.opts.gui.library_view.currentIndex() book_id = self.opts.gui.library_view.model().id(row) db = self.opts.gui.current_db mi = db.get_metadata(book_id, index_is_id=True) # Grab the title from the front of raw try: title = re.match(r'(?m)File: (?P<title>.*)$', raw).group('title') self._log("title='{0}".format(title)) # Populate a BookStruct book_mi = BookStruct() book_mi.active = True book_mi.author = 'Unknown' book_mi.book_id = mi.id book_mi.title = title book_mi.uuid = None book_mi.last_update = time.mktime(time.localtime()) book_mi.reader_app = self.app_name book_mi.cid = mi.id gr_annotations = raw.split('\n') num_lines = len(gr_annotations) highlights = {} # Find the first annotation i = 0 line = gr_annotations[i] self._log("Looking for Page: Line number={0} line='{1}'".format( i, line)) while not line.startswith('--- Page'): self._log(" unable to parse GoodReader Annotation summary") i += 1 line = gr_annotations[i] self._log( "Looking for Page: Line number={0} line='{1}'".format( i, line)) while i < num_lines and not line.startswith( '(report generated by GoodReader)'): # Extract the page number page_num = re.search('--- (Page \w+) ---', line) self._log("regex result: page_num={0}".format(page_num)) if page_num: page_num = page_num.group(1) self._log("page_num={0}".format(page_num)) # Extract the highlight i += 1 line = gr_annotations[i] self._log( "Looking for annotation start: Line number={0} line='{1}'" .format(i, line)) prefix = None while True: prefix = re.search( '^(?P<ann_type>{0})'.format( '|'.join(self.ANNOTATION_TYPES + self.SKIP_TYPES)), line) self._log("Searched for prefix={0}".format(prefix)) if prefix and prefix.group( 'ann_type') in self.SKIP_TYPES: i += 1 line = gr_annotations[i] self._log( "Looking for annotation start: Line number={0} line='{1}'" .format(i, line)) while not re.search( '^(?P<ann_type>{0})'.format('|'.join( self.ANNOTATION_TYPES)), line): i += 1 line = gr_annotations[i] self._log( "Looking for annotation start after a SKIP type: Line number={0} line='{1}'" .format(i, line)) continue elif prefix: self._log( "Have annotation start: Line number={0} line='{1}' prefix={2}" .format(i, line, prefix)) break else: i += 1 line = gr_annotations[i] self._log( "Looking for annotation start 2: Line number={0} line='{1}'" .format(i, line)) annotation = self._extract_highlight( line, prefix.group('ann_type')) annotation.page_num = page_num self._log( "Started annotation: page_num={0} annotation='{1}'". format(page_num, annotation)) # Get the annotation(s) i += 1 line = gr_annotations[i] self._log( "Reading annotation text 1: Line number={0} line='{1}'" .format(i, line)) ann = '' while i < num_lines \ and not line.startswith('--- Page') \ and not line.startswith('(report generated by GoodReader)'): if line: prefix = re.search( '^(?P<ann_type>{0})'.format( '|'.join(self.ANNOTATION_TYPES + self.SKIP_TYPES)), line) if prefix and prefix.group( 'ann_type') in self.SKIP_TYPES: # Continue until next ann_type i += 1 line = gr_annotations[i] while not re.search( '^(?P<ann_type>{0})'.format('|'.join( self.ANNOTATION_TYPES)), line): i += 1 if i == num_lines: break line = gr_annotations[i] continue elif prefix: # Additional highlight on the same page # write current annotation, start new annotation self._store_annotation(highlights, annotation) annotation = self._extract_highlight( line, prefix.group('ann_type')) annotation.page_num = page_num annotation.ann_type = prefix.group('ann_type') ann = '' i += 1 line = gr_annotations[i] continue if not ann: ann = line else: ann += '\n' + line i += 1 line = gr_annotations[i] annotation.ann = ann # Back up so that the next line is '--- Page' or '(report generated' i -= 1 self._store_annotation(highlights, annotation) i += 1 if i == num_lines: break line = gr_annotations[i] except Exception as e: import traceback self._log("Exception parsing GoodReader Annotation summary: %s" % e) traceback.print_exc() if log_failure: self._log(" unable to parse GoodReader Annotation summary") self._log("{:~^80}".format(" Imported Annotation summary ")) self._log(raw) self._log( "{:~^80}".format(" end imported Annotations summary ")) import traceback traceback.print_exc() msg = ('Unable to parse Annotation summary from %s. ' % self.app_name + 'Paste entire contents of emailed summary.') MessageBox(MessageBox.WARNING, 'Error importing annotations', msg, show_copy_button=False, parent=self.opts.gui).exec_() self._log_location("WARNING: %s" % msg) return False # Finalize book_mi book_mi.annotations = len(highlights) # Add book to books_db self.add_to_books_db(self.books_db, book_mi) self.annotated_book_list.append(book_mi) sorted_keys = sorted(list(highlights.keys())) for dt in sorted_keys: highlight_text = None if 'text' in highlights[dt]: highlight_text = highlights[dt]['text'] note_text = None if 'note' in highlights[dt]: note_text = highlights[dt]['note'] # Populate an AnnotationStruct a_mi = AnnotationStruct() a_mi.annotation_id = dt a_mi.book_id = book_mi['book_id'] a_mi.highlight_color = highlights[dt]['color'] a_mi.highlight_text = highlight_text a_mi.location = highlights[dt]['page'] a_mi.last_modification = dt a_mi.note_text = note_text # Location sort page_literal = re.match(r'^Page (?P<page>[0-9ivx]+).*$', a_mi.location).group('page') if re.match('[IXVL]', page_literal.upper()): whole = 0 decimal = self._roman_to_int(page_literal) else: whole = int(page_literal) decimal = 0 a_mi.location_sort = "%05d.%05d" % (whole, decimal) # Add annotation self.add_to_annotations_db(self.annotations_db, a_mi) self.update_book_last_annotation(self.books_db, dt, book_mi['book_id']) # Update the timestamp self.update_timestamp(self.annotations_db) self.update_timestamp(self.books_db) self.commit() return True
def _process_individual_book(book): book_mi = BookStruct() book_mi['reader_app'] = self.app_name book_mi['cid'] = None for md in xl: book_mi[xl[md]] = book.get(md) book_mi['active'] = True book_mi['annotations'] = 0 subjects = book.find('subjects') if subjects is not None: sl = [s.text for s in subjects] book_mi['genre'] = ', '.join(sl) this_is_news = False if 'News' in book_mi['genre']: if not self.collect_news_clippings: return this_is_news = True # Get the last update, count active annotations last_update = 0 hls = book.find('highlights') for hl in hls: this_ts = hl.get('datetime') if this_ts > last_update: last_update = this_ts if hl.get('deleted') == '0': book_mi['annotations'] += 1 book_mi['last_update'] = float(last_update) # Get the library cid, confidence toc_entries = None if this_is_news: cid = self.news_clippings_cid confidence = 5 else: cid, confidence = self.parent.generate_confidence(book_mi) if confidence >= 2: toc_entries = self._get_epub_toc(cid=cid) # Add annotated book to the db, master_list if len(hls): self.add_to_books_db(self.books_db, book_mi) self.annotated_book_list.append(book_mi) # Add the active annotations for this book to the db highlights = {} for hl in hls: if hl.get('deleted') == '1': continue datetime = hl.get('datetime') highlights[datetime] = {} for md in ['text', 'note', 'color', 'key', 'deleted', 'section', 'startx', 'startoffset']: highlights[datetime][md] = hl.get(md) sorted_keys = sorted(highlights.iterkeys()) for datetime in sorted_keys: highlight_text = highlights[datetime]['text'] note_text = highlights[datetime]['note'] # Populate an AnnotationStruct a_mi = AnnotationStruct() a_mi.annotation_id = highlights[datetime]['key'] a_mi.book_id = book_mi['book_id'] a_mi.highlight_color = self.HIGHLIGHT_COLORS[int(highlights[datetime]['color'])] a_mi.highlight_text = highlight_text a_mi.last_modification = datetime try: section = str(int(highlights[datetime]['section']) - 1) a_mi.location = toc_entries[section] except: if this_is_news: a_mi.location = book_mi['title'] else: a_mi.location = "Section %s" % highlights[datetime]['section'] a_mi.note_text = note_text # If empty highlight_text and empty note_text, not a useful annotation if (not highlight_text.strip() and not note_text.strip()): continue # Generate location_sort if this_is_news: a_mi.location_sort = datetime else: interior = self._generate_interior_location_sort(highlights[datetime]['startx']) if not interior: self._log("Marvin: unable to parse xpath:") self._log(" %s" % highlights[datetime]['startx']) self._log(a_mi) continue a_mi.location_sort = "%04d.%s.%04d" % ( int(highlights[datetime]['section']), interior, int(highlights[datetime]['startoffset'])) self.add_to_annotations_db(self.annotations_db, a_mi) self.update_book_last_annotation(self.books_db, datetime, book_mi['book_id']) # Update the timestamps self.update_timestamp(self.annotations_db) self.update_timestamp(self.books_db) self.commit()
def get_active_annotations(self): ''' For each annotation, construct an AnnotationStruct object with the highlight's metadata. Starred items are minimally required. Dashed items (highlight_text and note_text) may be one or both. AnnotationStruct properties: annotation_id: an int uniquely identifying the annotation *book_id: The book this annotation is associated with highlight_color: [Blue|Gray|Green|Pink|Purple|Underline|Yellow] -highlight_text: A list of paragraphs constituting the highlight last_modification: The timestamp of the annotation location: location of highlight in the book -note_text: A list of paragraphs constituting the note *timestamp: Unique timestamp of highlight's creation/modification time ''' # Sample annotations, indexed by timestamp. Note that annotations may have # highlight_text, note_text, or both. dict_of_anns = {} ts = datetime.datetime(2012, 12, 4, 8, 15, 0) dict_of_anns[time.mktime(ts.timetuple())] = {'book_id': 1, 'highlight_color': 'Gray', 'highlight_text': ['The first paragraph of the first highlight.', 'The second paragaph of the first highlight.'], } ts = ts.replace(minute=16) dict_of_anns[time.mktime(ts.timetuple())] = {'book_id': 1, 'highlight_color': 'Gray', 'highlight_text': ['The first paragraph of the second highlight.', 'The second paragaph of the second highlight.'], 'note_text': ['A note added to the second highlight'] } ts = ts.replace(minute=17) dict_of_anns[time.mktime(ts.timetuple())] = {'book_id': 1, 'highlight_color': 'Gray', 'note_text': ['A note added to the third highlight'] } ts = datetime.datetime(2012, 12, 10, 9, 0, 0) dict_of_anns[time.mktime(ts.timetuple())] = {'book_id': 2, 'highlight_color': 'Gray', 'highlight_text': ['The first paragraph of the first highlight.', 'The second paragaph of the first highlight.'] } ts = ts.replace(minute=1) dict_of_anns[time.mktime(ts.timetuple())] = {'book_id': 2, 'highlight_color': 'Gray', 'highlight_text': ['The first paragraph of the second highlight.', 'The second paragaph of the second highlight.'], 'note_text': ['A note added to the second highlight'] } ts = ts.replace(minute=2) dict_of_anns[time.mktime(ts.timetuple())] = {'book_id': 2, 'highlight_color': 'Gray', 'note_text': ['A note added to the third highlight'] } ts = datetime.datetime(2012, 12, 31, 23, 59, 0) dict_of_anns[time.mktime(ts.timetuple())] = {'book_id': 999, 'highlight_color': 'Gray', 'highlight_text': ['An orphan annotation (no book)'] } self._log("%s:get_active_annotations()" % self.app_name) self.opts.pb.set_label("Getting active annotations for %s" % self.app_name) self.opts.pb.set_value(0) annotations_db = self.generate_annotations_db_name(self.app_name_, self.opts.device_name) books_db = self.generate_books_db_name(self.app_name_, self.opts.device_name) # Create the annotations table self.create_annotations_table(annotations_db) # Initialize the progress bar self.opts.pb.set_label("Getting highlights from %s" % self.app_name) self.opts.pb.set_value(0) self.opts.pb.set_maximum(len(dict_of_anns)) # Add annotations to the database for timestamp in sorted(dict_of_anns.iterkeys()): # Populate an AnnotationStruct with available data ann_mi = AnnotationStruct() # Required items ann_mi.book_id = dict_of_anns[timestamp]['book_id'] ann_mi.last_modification = timestamp # Optional items if 'annotation_id' in dict_of_anns[timestamp]: ann_mi.annotation_id = dict_of_anns[timestamp]['annotation_id'] if 'highlight_color' in dict_of_anns[timestamp]: ann_mi.highlight_color = dict_of_anns[timestamp]['highlight_color'] if 'highlight_text' in dict_of_anns[timestamp]: highlight_text = '\n'.join(dict_of_anns[timestamp]['highlight_text']) ann_mi.highlight_text = highlight_text if 'note_text' in dict_of_anns[timestamp]: note_text = '\n'.join(dict_of_anns[timestamp]['note_text']) ann_mi.note_text = note_text # Add annotation to annotations_db self.add_to_annotations_db(annotations_db, ann_mi) # Increment the progress bar self.opts.pb.increment() # Update last_annotation in books_db self.update_book_last_annotation(books_db, timestamp, ann_mi.book_id) # Update the timestamp self.update_timestamp(annotations_db) self.commit()
def get_active_annotations(self): """ Fetch active iBooks annotations from AEAnnotation_*.sqlite """ self._log("%s:get_active_annotations()" % self.app_name) self.opts.pb.set_label("Getting active annotations for %s" % self.app_name) self.opts.pb.set_value(0) db_profile = self._localize_database_path(self.app_id, self.annotations_subpath) self.annotations_db = db_profile['path'] # Test timestamp against cached value cached_db = self.generate_annotations_db_name(self.app_name_, self.ios.device_name) books_db = self.generate_books_db_name(self.app_name_, self.ios.device_name) if self.opts.disable_caching or not self._cache_is_current(db_profile['stats'], cached_db): self._log(" fetching annotations from %s on %s" % (self.app_name, self.ios.device_name)) # Create the annotations table as needed self.create_annotations_table(cached_db) con = sqlite3.connect(self.annotations_db) with con: con.row_factory = sqlite3.Row cur = con.cursor() cur.execute('''SELECT ZANNOTATIONASSETID, ZANNOTATIONLOCATION, ZANNOTATIONMODIFICATIONDATE, ZANNOTATIONNOTE, ZANNOTATIONSELECTEDTEXT, ZANNOTATIONSTYLE, ZANNOTATIONUUID FROM ZAEANNOTATION WHERE ZANNOTATIONDELETED = 0 and ZANNOTATIONTYPE = 2 ORDER BY ZANNOTATIONMODIFICATIONDATE ''') rows = cur.fetchall() self.opts.pb.set_maximum(len(rows)) for row in rows: self.opts.pb.increment() book_id = row[b'ZANNOTATIONASSETID'] if not book_id in self.installed_books: continue # Collect the metadata # Sanitize text, note to unicode highlight_text = re.sub('\xa0', ' ', row[b'ZANNOTATIONSELECTEDTEXT']) highlight_text = UnicodeDammit(highlight_text).unicode highlight_text = highlight_text.rstrip('\n').split('\n') while highlight_text.count(''): highlight_text.remove('') highlight_text = [line.strip() for line in highlight_text] note_text = None if row[b'ZANNOTATIONNOTE']: note_text = UnicodeDammit(row[b'ZANNOTATIONNOTE']).unicode note_text = note_text.rstrip('\n').split('\n')[0] # Populate an AnnotationStruct a_mi = AnnotationStruct() a_mi.annotation_id = row[b'ZANNOTATIONUUID'] a_mi.book_id = book_id a_mi.epubcfi = row[b'ZANNOTATIONLOCATION'] a_mi.highlight_color = self.HIGHLIGHT_COLORS[row[b'ZANNOTATIONSTYLE']] a_mi.highlight_text = '\n'.join(highlight_text) a_mi.last_modification = row[b'ZANNOTATIONMODIFICATIONDATE'] + self.NSTimeIntervalSince1970 if a_mi.epubcfi: section = self._get_spine_index(a_mi.epubcfi) try: a_mi.location = self.tocs[book_id]["%.0f" % (section - 1)] except: a_mi.location = "Section %d" % section if self.collect_news_clippings and 'News' in self.get_genres(books_db, book_id): a_mi.location_sort = a_mi.last_modification else: a_mi.location_sort = self._generate_location_sort(a_mi.epubcfi) else: if self.collect_news_clippings and 'News' in self.get_genres(books_db, book_id): a_mi.location = self.get_title(books_db, book_id) a_mi.location_sort = a_mi.last_modification a_mi.note_text = note_text # Add annotation self.add_to_annotations_db(cached_db, a_mi) # Update last_annotation in books_db self.update_book_last_annotation(books_db, row[b'ZANNOTATIONMODIFICATIONDATE'] + self.NSTimeIntervalSince1970, book_id) self.update_timestamp(cached_db) self.commit() else: self._log(" retrieving cached annotations from %s" % cached_db)
def get_active_annotations(self): self._log("%s:get_active_annotations()" % self.app_name) self.opts.pb.set_label("Getting active annotations for %s" % self.app_name) self.opts.pb.set_value(0) db_profile = self._localize_database_path(self.app_id, self.annotations_subpath) self.annotations_db = db_profile['path'] # Test timestamp against cached value cached_db = self.generate_annotations_db_name(self.app_name_, self.ios.device_name) books_db = self.generate_books_db_name(self.app_name_, self.ios.device_name) if self.opts.disable_caching or not self._cache_is_current(db_profile['stats'], cached_db): self._log(" fetching annotations from %s on %s" % (self.app_name, self.ios.device_name)) # Create the annotations table as needed self.create_annotations_table(cached_db) obsolete_bookmarks = 0 deleted_bookmarks = 0 con = sqlite3.connect(self.annotations_db) with con: con.row_factory = sqlite3.Row cur = con.cursor() cur.execute('''SELECT * FROM Highlights ORDER BY NoteDateTime ''') rows = cur.fetchall() self.opts.pb.set_maximum(len(rows)) for row in rows: self.opts.pb.increment() book_id = row[b'BookID'] if not book_id in self.installed_books: obsolete_bookmarks += 1 continue # Collect the markup/highlight count for all installed books if row[b'Deleted'] == 1: deleted_bookmarks += 1 continue this_is_news = self.collect_news_clippings and 'News' in self.get_genres(books_db, book_id) # Sanitize text, note to unicode highlight_text = re.sub('\xa0', ' ', row[b'Text']) highlight_text = UnicodeDammit(highlight_text).unicode highlight_text = highlight_text.rstrip('\n').split('\n') while highlight_text.count(''): highlight_text.remove('') highlight_text = [line.strip() for line in highlight_text] note_text = None if row[b'Note']: ntu = UnicodeDammit(row[b'Note']).unicode note_text = ntu.rstrip('\n') # Populate an AnnotationStruct a_mi = AnnotationStruct() a_mi.annotation_id = row[b'UUID'] a_mi.book_id = book_id a_mi.highlight_color = self.HIGHLIGHT_COLORS[row[b'Colour']] a_mi.highlight_text = '\n'.join(highlight_text) a_mi.last_modification = row[b'NoteDateTime'] section = str(int(row[b'Section']) - 1) try: a_mi.location = self.tocs[book_id][section] except: if this_is_news: a_mi.location = self.get_title(books_db, book_id) else: a_mi.location = "Section %s" % row[b'Section'] a_mi.note_text = note_text # If empty highlight_text and empty note_text, not a useful annotation if not highlight_text and not note_text: continue # Generate location_sort if this_is_news: a_mi.location_sort = row[b'NoteDateTime'] else: interior = self._generate_interior_location_sort(row[b'StartXPath']) if not interior: self._log("Marvin: unable to parse xpath:") self._log(row[b'StartXPath']) self._log(a_mi) continue a_mi.location_sort = "%04d.%s.%04d" % ( int(row[b'Section']), interior, int(row[b'StartOffset'])) # Add annotation self.add_to_annotations_db(cached_db, a_mi) # Update last_annotation in books_db self.update_book_last_annotation(books_db, row[b'NoteDateTime'], book_id) # Update the timestamp self.update_timestamp(cached_db) self.commit() else: self._log(" retrieving cached annotations from %s" % cached_db)
def capture_content(self, uas, book_id, transient_db): ''' Store a set of annotations to the transient table ''' self.create_annotations_transient_table(transient_db) self._log_location(book_id, uas) for ua in uas: if isinstance(ua, NavigableString): continue if ua.name != 'div' or ua['class'] != "annotation": continue this_ua = AnnotationStruct() this_ua.book_id = book_id this_ua.hash = ua['hash'] try: this_ua.genre = ua['genre'] except: this_ua.genre = None try: this_ua.highlight_color = ua.find('table')['color'] except: this_ua.highlight_color = 'gray' try: this_ua.reader = ua['reader'] except: this_ua.reader = '' try: this_ua.last_modification = ua.find('td', 'timestamp')['uts'] except: this_ua.last_modification = "0" try: this_ua.location = ua.find('td', 'location').string except: this_ua.location = "" try: this_ua.location_sort = ua['location_sort'] except: this_ua.location_sort = "" try: pels = ua.findAll('p', 'highlight') self._log_location(book_id, "highlight pels={0}".format(pels)) this_ua.highlight_text = '\n'.join([p.string for p in pels]) self._log_location(book_id, "highlight - this_ua.highlight_text={0}".format(this_ua.highlight_text)) except: pass try: nels = ua.findAll('p', 'note') self._log_location(book_id, "note nels={0}".format(nels)) this_ua.note_text = '\n'.join([n.string for n in nels]) self._log_location(book_id, "highlight - this_ua.note_text={0}".format(this_ua.note_text)) except: pass self.add_to_transient_db(transient_db, this_ua)
def capture_content(self, uas, book_id, transient_db): ''' Store a set of annotations to the transient table ''' self.create_annotations_transient_table(transient_db) self._log_location(book_id, uas) annotation_list = [] for ua in uas: self._log_location(book_id, ua) if isinstance(ua, NavigableString): continue if ua.name != 'div' or ua['class'][0] != "annotation": continue this_ua = AnnotationStruct() this_ua.book_id = book_id this_ua.hash = ua['hash'] try: this_ua.genre = ua['genre'] except: this_ua.genre = None try: this_ua.highlight_color = ua.find('table')['color'] except: this_ua.highlight_color = 'gray' try: this_ua.reader = ua['reader'] except: this_ua.reader = '' try: this_ua.last_modification = ua.find('td', 'timestamp')['uts'] except: this_ua.last_modification = "0" try: this_ua.location = ua.find('td', 'location').string except: this_ua.location = "" try: this_ua.location_sort = ua['location_sort'] except: this_ua.location_sort = "" try: pels = ua.findAll('p', 'highlight') self._log_location(book_id, "highlight pels={0}".format(pels)) this_ua.highlight_text = '\n'.join([p.string or '' for p in pels]) self._log_location(book_id, "highlight - this_ua.highlight_text={0}".format(this_ua.highlight_text)) except: pass try: nels = ua.findAll('p', 'note') self._log_location(book_id, "note nels={0}".format(nels)) this_ua.note_text = '\n'.join([n.string or '' for n in nels]) self._log_location(book_id, "highlight - this_ua.note_text={0}".format(this_ua.note_text)) except: pass self._log_location(book_id, this_ua) annotation_list.append(this_ua) return annotation_list
def parse_exported_highlights(self, raw, log_failure=True): """ Extract highlights from pasted Annotations summary, add them to selected book in calibre library Construct a BookStruct object with the book's metadata. Starred items are minimally required. BookStruct properties: *active: [True|False] *author: "John Smith" author_sort: (if known) *book_id: an int uniquely identifying the book. Highlights are associated with books through book_id genre: "Fiction" (if known) *title: "The Story of John Smith" title_sort: "Story of John Smith, The" (if known) uuid: Calibre's uuid for this book, if known Construct an AnnotationStruct object with the highlight's metadata. Starred items are minimally required. Dashed items (highlight_text and note_text) may be one or both. AnnotationStruct properties: annotation_id: an int uniquely identifying the annotation *book_id: The book this annotation is associated with highlight_color: [Blue|Gray|Green|Pink|Purple|Underline|Yellow] -highlight_text: A list of paragraphs constituting the highlight last_modification: The timestamp of the annotation location: location of highlight in the book -note_text: A list of paragraphs constituting the note *timestamp: Unique timestamp of highlight's creation/modification time """ # Create the annotations, books table as needed self.annotations_db = "%s_imported_annotations" % self.app_name_ self.create_annotations_table(self.annotations_db) self.books_db = "%s_imported_books" % self.app_name_ self.create_books_table(self.books_db) self.annotated_book_list = [] self.selected_books = None # Generate the book metadata from the selected book row = self.opts.gui.library_view.currentIndex() book_id = self.opts.gui.library_view.model().id(row) db = self.opts.gui.current_db mi = db.get_metadata(book_id, index_is_id=True) try: lines = raw.split('\n') if len(lines) < 5: raise AnnotationsException("Invalid annotations summary") index = 0 annotations = {} # Get the title, author, publisher from the first three lines title = lines[index] index += 1 author = lines[index] index += 1 publisher = lines[index] index += 1 # Next line should be the first timestamp/location while index < len(lines): tsl = re.match(r'^(?P<timestamp>.*) \((?P<location>Page .*)\)', lines[index]) if tsl: ts = tsl.group('timestamp') isoformat = parse_date(ts, as_utc=False) isoformat = isoformat.replace(hour=12) timestamp = mktime(isoformat.timetuple()) while timestamp in annotations: timestamp += 60 location = tsl.group('location') index += 1 # Continue with highlight highlight_text = lines[index] index += 1 # Next line is either Note: or a new tsl note = re.match(r'^Notes: (?P<note_text>.*)', lines[index]) note_text = None if note: note_text = note.group('note_text') index += 1 if re.match(r'^(?P<timestamp>.*) \((?P<location>Page .*)\)', lines[index]): # New note - store the old one, continue ann = AnnotationStruct() ann.book_id = mi.id ann.annotation_id = index ann.highlight_color = 'Yellow' ann.highlight_text = highlight_text ann.location = location ann.location_sort = "%05d" % int(re.match(r'^Page (?P<page>\d+).*$', location).group('page')) ann.note_text = note_text ann.last_modification = timestamp # Add annotation to db annotations[timestamp] = ann continue else: # Store the last one ann = AnnotationStruct() ann.book_id = mi.id ann.annotation_id = index ann.highlight_color = 'Yellow' ann.highlight_text = highlight_text ann.location = location ann.location_sort = "%05d" % int(re.match(r'^Page (?P<page>\d+).*$', location).group('page')) ann.note_text = note_text ann.last_modification = timestamp annotations[timestamp] = ann break except: if log_failure: self._log(" unable to parse %s Annotations" % self.app_name) self._log("{:~^80}".format(" Imported Annotation summary ")) self._log(raw) self._log("{:~^80}".format(" end imported Annotations summary ")) import traceback traceback.print_exc() msg = ('Unable to parse Annotation summary from %s. ' % self.app_name + 'Paste entire contents of emailed summary.') MessageBox(MessageBox.WARNING, 'Error importing annotations', msg, show_copy_button=False, parent=self.opts.gui).exec_() self._log_location("WARNING: %s" % msg) return False # Populate a BookStruct book_mi = BookStruct() book_mi.active = True book_mi.author = author book_mi.book_id = mi.id book_mi.title = title book_mi.uuid = None book_mi.last_update = time.mktime(time.localtime()) book_mi.reader_app = self.app_name book_mi.cid = mi.id book_mi.annotations = len(annotations) # Add book to books_db self.add_to_books_db(self.books_db, book_mi) self.annotated_book_list.append(book_mi) # Add the annotations for timestamp in sorted(annotations.keys()): self.add_to_annotations_db(self.annotations_db, annotations[timestamp]) self.update_book_last_annotation(self.books_db, timestamp, mi.id) self.opts.pb.increment() self.update_book_last_annotation(self.books_db, timestamp, mi.id) # Update the timestamp self.update_timestamp(self.annotations_db) self.update_timestamp(self.books_db) self.commit() # Return True if successful return True
def parse_exported_highlights(self, raw, log_failure=True): """ Extract highlights from pasted Annotation summary email Return True if no problems Return False if error """ # Create the annotations, books table as needed self.annotations_db = "%s_imported_annotations" % self.app_name_ self.create_annotations_table(self.annotations_db) self.books_db = "%s_imported_books" % self.app_name_ self.create_books_table(self.books_db) self.annotated_book_list = [] self.selected_books = None # Generate the book metadata from the selected book row = self.opts.gui.library_view.currentIndex() book_id = self.opts.gui.library_view.model().id(row) db = self.opts.gui.current_db mi = db.get_metadata(book_id, index_is_id=True) # Grab the title from the front of raw try: title = re.match(r'(?m)File: (?P<title>.*)$', raw).group('title') # Populate a BookStruct book_mi = BookStruct() book_mi.active = True book_mi.author = 'Unknown' book_mi.book_id = mi.id book_mi.title = title book_mi.uuid = None book_mi.last_update = time.mktime(time.localtime()) book_mi.reader_app = self.app_name book_mi.cid = mi.id gr_annotations = raw.split('\n') num_lines = len(gr_annotations) highlights = {} # Find the first annotation i = 0 line = gr_annotations[i] while not line.startswith('--- Page'): i += 1 line = gr_annotations[i] while i < num_lines and not line.startswith('(report generated by GoodReader)'): # Extract the page number page_num = re.search('--- (Page \w+) ---', line) if page_num: page_num = page_num.group(1) # Extract the highlight i += 1 line = gr_annotations[i] prefix = None while True: prefix = re.search('^(?P<ann_type>{0})'.format('|'.join(self.ANNOTATION_TYPES + self.SKIP_TYPES)), line) if prefix and prefix.group('ann_type') in self.SKIP_TYPES: i += 1 line = gr_annotations[i] while not re.search('^(?P<ann_type>{0})'.format('|'.join(self.ANNOTATION_TYPES)), line): i += 1 line = gr_annotations[i] continue elif prefix: break else: i += 1 line = gr_annotations[i] annotation = self._extract_highlight(line, prefix.group('ann_type')) annotation.page_num = page_num # Get the annotation(s) i += 1 line = gr_annotations[i] ann = '' while i < num_lines \ and not line.startswith('--- Page') \ and not line.startswith('(report generated by GoodReader)'): if line: prefix = re.search('^(?P<ann_type>{0})'.format('|'.join(self.ANNOTATION_TYPES + self.SKIP_TYPES)), line) if prefix and prefix.group('ann_type') in self.SKIP_TYPES: # Continue until next ann_type i += 1 line = gr_annotations[i] while not re.search('^(?P<ann_type>{0})'.format('|'.join(self.ANNOTATION_TYPES)), line): i += 1 if i == num_lines: break line = gr_annotations[i] continue elif prefix: # Additional highlight on the same page # write current annotation, start new annotation self._store_annotation(highlights, annotation) annotation = self._extract_highlight(line, prefix.group('ann_type')) annotation.page_num = page_num annotation.ann_type = prefix.group('ann_type') ann = '' i += 1 line = gr_annotations[i] continue if not ann: ann = line else: ann += '\n' + line i += 1 line = gr_annotations[i] annotation.ann = ann # Back up so that the next line is '--- Page' or '(report generated' i -= 1 self._store_annotation(highlights, annotation) i += 1 if i == num_lines: break line = gr_annotations[i] except: if log_failure: self._log(" unable to parse GoodReader Annotation summary") self._log("{:~^80}".format(" Imported Annotation summary ")) self._log(raw) self._log("{:~^80}".format(" end imported Annotations summary ")) import traceback traceback.print_exc() msg = ('Unable to parse Annotation summary from %s. ' % self.app_name + 'Paste entire contents of emailed summary.') MessageBox(MessageBox.WARNING, 'Error importing annotations', msg, show_copy_button=False, parent=self.opts.gui).exec_() self._log_location("WARNING: %s" % msg) return False # Finalize book_mi book_mi.annotations = len(highlights) # Add book to books_db self.add_to_books_db(self.books_db, book_mi) self.annotated_book_list.append(book_mi) sorted_keys = sorted(highlights.iterkeys()) for dt in sorted_keys: highlight_text = None if 'text' in highlights[dt]: highlight_text = highlights[dt]['text'] note_text = None if 'note' in highlights[dt]: note_text = highlights[dt]['note'] # Populate an AnnotationStruct a_mi = AnnotationStruct() a_mi.annotation_id = dt a_mi.book_id = book_mi['book_id'] a_mi.highlight_color = highlights[dt]['color'] a_mi.highlight_text = highlight_text a_mi.location = highlights[dt]['page'] a_mi.last_modification = dt a_mi.note_text = note_text # Location sort page_literal = re.match(r'^Page (?P<page>[0-9ivx]+).*$', a_mi.location).group('page') if re.match('[IXVL]', page_literal.upper()): whole = 0 decimal = self._roman_to_int(page_literal) else: whole = int(page_literal) decimal = 0 a_mi.location_sort = "%05d.%05d" % (whole, decimal) # Add annotation self.add_to_annotations_db(self.annotations_db, a_mi) self.update_book_last_annotation(self.books_db, dt, book_mi['book_id']) # Update the timestamp self.update_timestamp(self.annotations_db) self.update_timestamp(self.books_db) self.commit() return True
def get_active_annotations(self): """ Fetch active iBooks annotations from AEAnnotation_*.sqlite """ self._log("%s:get_active_annotations()" % self.app_name) self.opts.pb.set_label("Getting active annotations for %s" % self.app_name) self.opts.pb.set_value(0) db_profile = self._localize_database_path(self.app_id, self.annotations_subpath) self.annotations_db = db_profile['path'] # Test timestamp against cached value cached_db = self.generate_annotations_db_name(self.app_name_, self.ios.device_name) books_db = self.generate_books_db_name(self.app_name_, self.ios.device_name) if self.opts.disable_caching or not self._cache_is_current( db_profile['stats'], cached_db): self._log(" fetching annotations from %s on %s" % (self.app_name, self.ios.device_name)) # Create the annotations table as needed self.create_annotations_table(cached_db) con = sqlite3.connect(self.annotations_db) with con: con.row_factory = sqlite3.Row cur = con.cursor() cur.execute('''SELECT ZANNOTATIONASSETID, ZANNOTATIONLOCATION, ZANNOTATIONMODIFICATIONDATE, ZANNOTATIONNOTE, ZANNOTATIONSELECTEDTEXT, ZANNOTATIONSTYLE, ZANNOTATIONUUID FROM ZAEANNOTATION WHERE ZANNOTATIONDELETED = 0 and ZANNOTATIONTYPE = 2 ORDER BY ZANNOTATIONMODIFICATIONDATE ''') rows = cur.fetchall() self.opts.pb.set_maximum(len(rows)) for row in rows: self.opts.pb.increment() book_id = row[b'ZANNOTATIONASSETID'] if not book_id in self.installed_books: continue # Collect the metadata # Sanitize text, note to unicode highlight_text = re.sub('\xa0', ' ', row[b'ZANNOTATIONSELECTEDTEXT']) highlight_text = UnicodeDammit(highlight_text).unicode highlight_text = highlight_text.rstrip('\n').split('\n') while highlight_text.count(''): highlight_text.remove('') highlight_text = [line.strip() for line in highlight_text] note_text = None if row[b'ZANNOTATIONNOTE']: note_text = UnicodeDammit( row[b'ZANNOTATIONNOTE']).unicode note_text = note_text.rstrip('\n').split('\n')[0] # Populate an AnnotationStruct a_mi = AnnotationStruct() a_mi.annotation_id = row[b'ZANNOTATIONUUID'] a_mi.book_id = book_id a_mi.epubcfi = row[b'ZANNOTATIONLOCATION'] a_mi.highlight_color = self.HIGHLIGHT_COLORS[ row[b'ZANNOTATIONSTYLE']] a_mi.highlight_text = '\n'.join(highlight_text) a_mi.last_modification = row[ b'ZANNOTATIONMODIFICATIONDATE'] + self.NSTimeIntervalSince1970 if a_mi.epubcfi: section = self._get_spine_index(a_mi.epubcfi) try: a_mi.location = self.tocs[book_id]["%.0f" % (section - 1)] except: a_mi.location = "Section %d" % section if self.collect_news_clippings and 'News' in self.get_genres( books_db, book_id): a_mi.location_sort = a_mi.last_modification else: a_mi.location_sort = self._generate_location_sort( a_mi.epubcfi) else: if self.collect_news_clippings and 'News' in self.get_genres( books_db, book_id): a_mi.location = self.get_title(books_db, book_id) a_mi.location_sort = a_mi.last_modification a_mi.note_text = note_text # Add annotation self.add_to_annotations_db(cached_db, a_mi) # Update last_annotation in books_db self.update_book_last_annotation( books_db, row[b'ZANNOTATIONMODIFICATIONDATE'] + self.NSTimeIntervalSince1970, book_id) self.update_timestamp(cached_db) self.commit() else: self._log(" retrieving cached annotations from %s" % cached_db)
def get_active_annotations(self): ''' For each annotation, construct an AnnotationStruct object with the highlight's metadata. Starred items are minimally required. Dashed items (highlight_text and note_text) may be one or both. AnnotationStruct properties: annotation_id: an int uniquely identifying the annotation *book_id: The book this annotation is associated with highlight_color: [Blue|Gray|Green|Pink|Purple|Underline|Yellow] -highlight_text: A list of paragraphs constituting the highlight last_modification: The timestamp of the annotation location: location of highlight in the book -note_text: A list of paragraphs constituting the note *timestamp: Unique timestamp of highlight's creation/modification time ''' self._log("%s:get_active_annotations()" % self.app_name) self.active_annotations = {} self.opts.pb.set_label("Getting active annotations for %s" % self.app_name) self.opts.pb.set_value(0) # Don't change the template of the _db strings #self.books_db = "%s_books_%s" % (re.sub(' ', '_', self.app_name), re.sub(' ', '_', self.opts.device_name)) #self.annotations_db = "%s_annotations_%s" % (re.sub(' ', '_', self.app_name), re.sub(' ', '_', self.opts.device_name)) self.annotations_db = self.generate_annotations_db_name( self.app_name_, self.opts.device_name) self.books_db = self.generate_books_db_name(self.app_name_, self.opts.device_name) # Create the annotations table self.create_annotations_table(self.annotations_db) # Parse MyClippings.txt for entries matching installed_books self._parse_tolino_notes() # Initialize the progress bar self.opts.pb.set_label("Getting highlights from %s" % self.app_name) self.opts.pb.set_value(0) self.opts.pb.show() self.opts.pb.set_maximum(len(self.active_annotations)) # Add annotations to the database for timestamp in sorted(self.active_annotations.keys()): # Populate an AnnotationStruct with available data ann_mi = AnnotationStruct() # Required items ann_mi.book_id = self.active_annotations[timestamp]['book_id'] ann_mi.last_modification = timestamp this_is_news = self.collect_news_clippings and 'News' in self.get_genres( self.books_db, ann_mi.book_id) # Optional items if 'annotation_id' in self.active_annotations[timestamp]: ann_mi.annotation_id = self.active_annotations[timestamp][ 'annotation_id'] if 'highlight_color' in self.active_annotations[timestamp]: ann_mi.highlight_color = self.active_annotations[timestamp][ 'highlight_color'] if 'highlight_text' in self.active_annotations[timestamp]: highlight_text = '\n'.join( self.active_annotations[timestamp]['highlight_text']) ann_mi.highlight_text = highlight_text if this_is_news: ann_mi.location = self.get_title(self.books_db, ann_mi.book_id) ann_mi.location_sort = timestamp else: if 'location' in self.active_annotations[timestamp]: ann_mi.location = self.active_annotations[timestamp][ 'location'] if 'location_sort' in self.active_annotations[timestamp]: ann_mi.location_sort = self.active_annotations[timestamp][ 'location_sort'] if 'note_text' in self.active_annotations[timestamp]: note_text = '\n'.join( self.active_annotations[timestamp]['note_text']) ann_mi.note_text = note_text # Add annotation to self.annotations_db self.add_to_annotations_db(self.annotations_db, ann_mi) # Increment the progress bar self.opts.pb.increment() # Update last_annotation in self.books_db self.update_book_last_annotation(self.books_db, timestamp, ann_mi.book_id) self.opts.pb.hide() # Update the timestamp self.update_timestamp(self.annotations_db) self.commit()
def get_active_annotations(self): ''' For each annotation, construct an AnnotationStruct object with the highlight's metadata. Starred items are minimally required. Dashed items (highlight_text and note_text) may be one or both. AnnotationStruct properties: annotation_id: an int uniquely identifying the annotation *book_id: The book this annotation is associated with highlight_color: [Blue|Gray|Green|Pink|Purple|Underline|Yellow] -highlight_text: A list of paragraphs constituting the highlight last_modification: The timestamp of the annotation location: location of highlight in the book -note_text: A list of paragraphs constituting the note *timestamp: Unique timestamp of highlight's creation/modification time ''' self._log_location("Start!!!!") self._log("%s:get_active_annotations()" % self.app_name) self.opts.pb.set_label("Getting active annotations for %s" % self.app_name) self.opts.pb.set_value(0) annotations_db = self.generate_annotations_db_name(self.app_name_, self.opts.device_name) self.books_db = self.generate_books_db_name(self.app_name_, self.opts.device_name) self._log("%s:get_active_annotations() - annotations_db=%s, books_db=%s" % (self.app_name, annotations_db, self.books_db)) # Create the annotations table self.create_annotations_table(annotations_db) self._fetch_annotations() # Initialize the progress bar self.opts.pb.set_label("Getting highlights from %s" % self.app_name) self.opts.pb.set_value(0) self.opts.pb.set_maximum(len(self.active_annotations)) # self._log("%s:get_active_annotations() - self.active_annotations={0}".format(self.active_annotations)) # Add annotations to the database for annotation in sorted(list(self.active_annotations.values()), key=lambda k: (k['book_id'], k['location_sort'], k['last_modification'])): # Populate an AnnotationStruct with available data ann_mi = AnnotationStruct() # Required items ann_mi.book_id = annotation['book_id'] ann_mi.last_modification = annotation['last_modification'] # Optional items if 'annotation_id' in annotation: ann_mi.annotation_id = annotation['annotation_id'] if 'highlight_color' in annotation: ann_mi.highlight_color = annotation['highlight_color'] if 'highlight_text' in annotation: # self._log("get_active_annotations() - annotation['highlight_text']={0}".format(annotation['highlight_text'])) highlight_text = annotation['highlight_text'] ann_mi.highlight_text = highlight_text if 'note_text' in annotation: note_text = annotation['note_text'] ann_mi.note_text = note_text if 'location' in annotation: ann_mi.location = annotation['location'] if 'location_sort' in annotation: ann_mi.location_sort = annotation['location_sort'] # self._log(ann_mi) # Add annotation to annotations_db self.add_to_annotations_db(annotations_db, ann_mi) # Increment the progress bar self.opts.pb.increment() # self._log("%s:get_active_annotations() - books_db=%s" % (self.app_name, self.books_db)) # Update last_annotation in books_db self.update_book_last_annotation(self.books_db, ann_mi.last_modification, ann_mi.book_id) # Update the timestamp self.update_timestamp(annotations_db) self.commit() self._log_location("Finish!!!!")
def get_active_annotations(self): ''' For each annotation, construct an AnnotationStruct object with the highlight's metadata. Starred items are minimally required. Dashed items (highlight_text and note_text) may be one or both. AnnotationStruct properties: annotation_id: an int uniquely identifying the annotation *book_id: The book this annotation is associated with highlight_color: [Blue|Gray|Green|Pink|Purple|Underline|Yellow] -highlight_text: A list of paragraphs constituting the highlight last_modification: The timestamp of the annotation location: location of highlight in the book -note_text: A list of paragraphs constituting the note *timestamp: Unique timestamp of highlight's creation/modification time ''' self._log_location("Start!!!!") self._log("%s:get_active_annotations()" % self.app_name) self.opts.pb.set_label("Getting active annotations for %s" % self.app_name) self.opts.pb.set_value(0) annotations_db = self.generate_annotations_db_name(self.app_name_, self.opts.device_name) self.books_db = self.generate_books_db_name(self.app_name_, self.opts.device_name) self._log("%s:get_active_annotations() - annotations_db=%s, books_db=%s" % (self.app_name, annotations_db, self.books_db)) # Create the annotations table self.create_annotations_table(annotations_db) self._fetch_annotations() # Initialize the progress bar self.opts.pb.set_label("Getting highlights from %s" % self.app_name) self.opts.pb.set_value(0) self.opts.pb.set_maximum(len(self.active_annotations)) # Add annotations to the database for annotation_id in sorted(self.active_annotations.iterkeys()): # Populate an AnnotationStruct with available data ann_mi = AnnotationStruct() # Required items ann_mi.book_id = self.active_annotations[annotation_id]['book_id'] ann_mi.last_modification = self.active_annotations[annotation_id]['last_modification'] # Optional items if 'annotation_id' in self.active_annotations[annotation_id]: ann_mi.annotation_id = self.active_annotations[annotation_id]['annotation_id'] if 'highlight_color' in self.active_annotations[annotation_id]: ann_mi.highlight_color = self.active_annotations[annotation_id]['highlight_color'] if 'highlight_text' in self.active_annotations[annotation_id]: self._log("get_active_annotations() - self.active_annotations[annotation_id]['highlight_text']={0}".format(self.active_annotations[annotation_id]['highlight_text'])) highlight_text = self.active_annotations[annotation_id]['highlight_text'] ann_mi.highlight_text = highlight_text if 'note_text' in self.active_annotations[annotation_id]: note_text = self.active_annotations[annotation_id]['note_text'] ann_mi.note_text = note_text if 'location' in self.active_annotations[annotation_id]: ann_mi.location = self.active_annotations[annotation_id]['location'] if 'location_sort' in self.active_annotations[annotation_id]: ann_mi.location_sort = self.active_annotations[annotation_id]['location_sort'] # self._log(ann_mi) # Add annotation to annotations_db self.add_to_annotations_db(annotations_db, ann_mi) # Increment the progress bar self.opts.pb.increment() self._log("%s:get_active_annotations() - books_db=%s" % (self.app_name, self.books_db)) # Update last_annotation in books_db self.update_book_last_annotation(self.books_db, ann_mi.last_modification, ann_mi.book_id) # Update the timestamp self.update_timestamp(annotations_db) self.commit() self._log_location("Finish!!!!")
def parse_exported_highlights(self, raw): """ Extract highlights from pasted Annotations summary, add them to selected book in calibre library Construct a BookStruct object with the book's metadata. Starred items are minimally required. BookStruct properties: *active: [True|False] *author: "John Smith" author_sort: (if known) *book_id: an int uniquely identifying the book. Highlights are associated with books through book_id genre: "Fiction" (if known) *title: "The Story of John Smith" title_sort: "Story of John Smith, The" (if known) uuid: Calibre's uuid for this book, if known Construct an AnnotationStruct object with the highlight's metadata. Starred items are minimally required. Dashed items (highlight_text and note_text) may be one or both. AnnotationStruct properties: annotation_id: an int uniquely identifying the annotation *book_id: The book this annotation is associated with highlight_color: [Blue|Gray|Green|Pink|Purple|Underline|Yellow] -highlight_text: A list of paragraphs constituting the highlight last_modification: The timestamp of the annotation location: location of highlight in the book -note_text: A list of paragraphs constituting the note *timestamp: Unique timestamp of highlight's creation/modification time """ self._log("%s:parse_exported_highlight()" % self.app_name) # Create the annotations, books table as needed self.annotations_db = "%s_imported_annotations" % self.app_name_ self.create_annotations_table(self.annotations_db) self.books_db = "%s_imported_books" % self.app_name_ self.create_books_table(self.books_db) self.annotated_book_list = [] self.selected_books = None # Generate the book metadata from the selected book row = self.opts.gui.library_view.currentIndex() book_id = self.opts.gui.library_view.model().id(row) db = self.opts.gui.current_db mi = db.get_metadata(book_id, index_is_id=True) # Populate author, title at a minimum title = "A Book With Some Exported Annotations" author = "John Smith" # Populate a BookStruct book_mi = BookStruct() book_mi.active = True book_mi.author = author book_mi.book_id = mi.id book_mi.title = title book_mi.uuid = None book_mi.last_update = time.mktime(time.localtime()) book_mi.reader_app = self.app_name book_mi.cid = mi.id book_mi.annotations = len(self.highlights) # Add annotations to the database for timestamp in sorted(self.highlights.iterkeys()): book_mi.last_update = timestamp # Populate an AnnotationStruct ann_mi = AnnotationStruct() # Required items ann_mi.book_id = book_mi['book_id'] ann_mi.last_modification = timestamp # Optional items if 'annotation_id' in self.highlights[timestamp]: ann_mi.annotation_id = self.highlights[timestamp]['annotation_id'] if 'highlight_color' in self.highlights[timestamp]: ann_mi.highlight_color = self.highlights[timestamp]['highlight_color'] if 'highlight_text' in self.highlights[timestamp]: highlight_text = '\n'.join(self.highlights[timestamp]['highlight_text']) ann_mi.highlight_text = highlight_text if 'note_text' in self.highlights[timestamp]: note_text = '\n'.join(self.highlights[timestamp]['note_text']) ann_mi.note_text = note_text # Add annotation to annotations_db self.add_to_annotations_db(self.annotations_db, ann_mi) # Increment the progress bar self.opts.pb.increment() # Update last_annotation in books_db self.update_book_last_annotation(self.books_db, timestamp, ann_mi.book_id) # Add book to books_db self.add_to_books_db(self.books_db, book_mi) self.annotated_book_list.append(book_mi) # Update the timestamp self.update_timestamp(self.annotations_db) self.update_timestamp(self.books_db) self.commit() # Return True if successful return True