def find_or_make_source(self): """ Find or create a source. returns handle to source.""" for hndl in self.dbstate.db.get_source_handles(): if self.dbstate.db.get_raw_source_data(hndl)[2] == 'GeoNames': return hndl # No source found, lets add one with associated repo and note repo = Repository() repo.set_name("www.geonames.org") rtype = RepositoryType(RepositoryType.WEBSITE) repo.set_type(rtype) url = Url() url.set_path('http://www.geonames.org/') url.set_description(_('GeoNames web site')) url.set_type(UrlType(UrlType.WEB_HOME)) repo.add_url(url) url = Url() url.set_path('*****@*****.**') url.set_description(_('GeoNames author')) url.set_type(UrlType(UrlType.EMAIL)) repo.add_url(url) note_txt = StyledText( _('GeoNames was founded by Marc Wick. You can reach him at ')) note_txt += StyledText('*****@*****.**' + '\n') note_txt += StyledText( _('GeoNames is a project of Unxos GmbH, Weingartenstrasse 8,' ' 8708 Männedorf, Switzerland.\nThis work is licensed under a ')) note_txt += linkst( _('Creative Commons Attribution 3.0 License'), 'https://creativecommons.org/licenses/by/3.0/legalcode') new_note = Note() new_note.set_styledtext(note_txt) new_note.set_type(NoteType.REPO) src = Source() src.title = 'GeoNames' src.author = 'Marc Wick' repo_ref = RepoRef() mtype = SourceMediaType(SourceMediaType.ELECTRONIC) repo_ref.set_media_type(mtype) with DbTxn( _("Add Souce/Repo/Note (%s)") % "GeoNames", self.dbstate.db) as trans: self.dbstate.db.add_note(new_note, trans) repo.add_note(new_note.get_handle()) self.dbstate.db.add_repository(repo, trans) repo_ref.set_reference_handle(repo.handle) src.add_repo_reference(repo_ref) self.dbstate.db.add_source(src, trans) return src.handle
def clear_text(self): self.left.set_sensitive(False) self.right.set_sensitive(False) self.edit.set_sensitive(False) self.texteditor.set_text(StyledText()) self.page.set_text('') self.current = 0
def clear_models(self): """ Clear the models. """ for model in self.models: model.clear() self.changelist = [] self.indx =[] self.tb.set_text(StyledText(_('\n\n' 'Notes selected on the left pane are shown Before cleanup in' ' this box.'))) self.ta.set_text(StyledText(_('\n\n' 'Notes selected on the left pane are shown After cleanup in this' ' box.\n' 'If you wish to make changes, you can make them here and' ' use the style controls in the toolbar above.')))
def _get_styled(name, callname, placeholder=False, trans_text=glocale.translation.sgettext, name_format=None): """ Return a StyledText object with the name formatted according to the parameters: @param callname: whether the callname should be used instead of the first name (CALLNAME_REPLACE), underlined within the first name (CALLNAME_UNDERLINE_ADD) or not used at all (CALLNAME_DONTUSE). @param placeholder: whether a series of underscores should be inserted as a placeholder if first name or surname are missing. @param trans_text: allow deferred translation of strings @type trans_text: a GrampsLocale sgettext instance trans_text is a defined keyword (see po/update_po.py, po/genpot.sh) :param name_format: optional format to control display of person's name :type name_format: None or int """ # Make a copy of the name object so we don't mess around with the real # data. n = Name(source=name) # Insert placeholders. if placeholder: if not n.first_name: n.first_name = "____________" if not n.surname: n.surname = "____________" if n.call: if callname == CALLNAME_REPLACE: # Replace first name with call name. n.first_name = n.call elif callname == CALLNAME_UNDERLINE_ADD: if n.call not in n.first_name: # Add call name to first name. # translators: used in French+Russian, ignore otherwise n.first_name = trans_text('"%(callname)s" (%(firstname)s)') % { 'callname': n.call, 'firstname': n.first_name } real_format = name_displayer.get_default_format() if name_format is not None: name_displayer.set_default_format(name_format) text = name_displayer.display_name(n) name_displayer.set_default_format(real_format) tags = [] if n.call: if callname == CALLNAME_UNDERLINE_ADD: # "name" in next line is on purpose: only underline the call name # if it was a part of the *original* first name if n.call in name.first_name: # Underline call name callpos = text.find(n.call) tags = [StyledTextTag(StyledTextTagType.UNDERLINE, True, [(callpos, callpos + len(n.call))])] return StyledText(text, tags)
def styledtext_to_html( styledtext: StyledText, space_format: int, contains_html: bool = False, link_format: Optional[str] = None, ): """Return the note in HTML format. Adapted from DynamicWeb. """ backend = HtmlBackend() if link_format is not None: backend.build_link = build_link_factory(link_format) text = str(styledtext) if not text: return "" s_tags = styledtext.get_tags() html_list = Html("div", class_="grampsstylednote") if contains_html: markuptext = backend.add_markup_from_styled(text, s_tags, split="\n", escape=False) html_list += markuptext else: markuptext = backend.add_markup_from_styled(text, s_tags, split="\n") linelist = [] linenb = 1 sigcount = 0 for line in markuptext.split("\n"): [line, sigcount] = process_spaces(line, format=space_format) if sigcount == 0: # The rendering of an empty paragraph '<p></p>' # is undefined so we use a non-breaking space if linenb == 1: linelist.append(" ") html_list.extend(Html("p") + linelist) linelist = [] linenb = 1 else: if linenb > 1: linelist[-1] += "<br />" linelist.append(line) linenb += 1 if linenb > 1: html_list.extend(Html("p") + linelist) # if the last line was blank, then as well as outputting the previous para, # which we have just done, # we also output a new blank para if sigcount == 0: linelist = [" "] html_list.extend(Html("p") + linelist) return "\n".join(html_list)
def cleanup(self, _button): """ Cleanup Notes. """ self.clear_models() StyledText.__getitem__ = MyStyled.__getitem__ # patch in slice func progress = ProgressMeter(self.window_name, can_cancel=True, parent=self.window) length = self.db.get_number_of_notes() progress.set_pass(_('Scanning Notes'), length) for handle in self.db.get_note_handles(): note = self.db.get_note_from_handle(handle) g_id = note.gramps_id stext = note.get_styledtext() optype = -1 # find the notes and do cleanup #if not stext.tags: text = StyledText(stext._string, stext._tags) # make a copy result = self.convert_to_styled(text) indx = len(self.changelist) for styledtext_tag in result.tags: if (int(styledtext_tag.name) == StyledTextTagType.HIGHLIGHT and '#FFFF00' == styledtext_tag.value): optype = ISSUE break elif int(styledtext_tag.name) == StyledTextTagType.LINK: optype = LINK while True: if optype == ISSUE: # make list of notes with errors self.models[ISSUE].append((self.preview(stext, g_id), indx)) elif stext._string != result._string: # Make list of edited notes self.models[CLEANED].append((self.preview(stext, g_id), indx)) elif optype == LINK: # make list of notes with only links self.models[LINK].append((self.preview(stext, g_id), indx)) else: break self.changelist.append((handle, stext, result)) break progress.step() if progress.get_cancelled(): break self.show_tabs() progress.close()
def _get_styled(name, callname, placeholder=False, name_format=None): """ Return a StyledText object with the name formatted according to the parameters: @param callname: whether the callname should be used instead of the first name (CALLNAME_REPLACE), underlined within the first name (CALLNAME_UNDERLINE_ADD) or not used at all (CALLNAME_DONTUSE). @param placeholder: whether a series of underscores should be inserted as a placeholder if first name or surname are missing. """ # Make a copy of the name object so we don't mess around with the real # data. n = Name(source=name) # Insert placeholders. if placeholder: if not n.first_name: n.first_name = "____________" if not n.surname: n.surname = "____________" if n.call: if callname == CALLNAME_REPLACE: # Replace first name with call name. n.first_name = n.call elif callname == CALLNAME_UNDERLINE_ADD: if n.call not in n.first_name: # Add call name to first name. n.first_name = "\"%(call)s\" (%(first)s)" % { 'call': n.call, 'first': n.first_name } real_format = name_displayer.get_default_format() if name_format is not None: name_displayer.set_default_format(name_format) text = name_displayer.display_name(n) name_displayer.set_default_format(real_format) tags = [] if n.call: if callname == CALLNAME_UNDERLINE_ADD: # "name" in next line is on purpose: only underline the call name # if it was a part of the *original* first name if n.call in name.first_name: # Underline call name callpos = text.find(n.call) tags = [ StyledTextTag(StyledTextTagType.UNDERLINE, True, [(callpos, callpos + len(n.call))]) ] return StyledText(text, tags)
def create_note(self, place, data, trans): new_note = Note() tag = StyledTextTag(StyledTextTagType.FONTFACE, 'Monospace', [(0, len(data))]) text = StyledText(data, [tag]) new_note.set_styledtext(text) note_type = NoteType() note_type.set((NoteType.CUSTOM, _("Place titles"))) new_note.set_type(note_type) handle = self.db.add_note(new_note, trans) place.add_note(handle)
def main(self): # return false finishes if self._dirty: return self.active_person_edit.hide() self.active_family_edit.hide() self.active_family_label.hide() self.note_buffer.set_text(StyledText()) active_person = self.get_active_object("Person") self._dirty_person = active_person self._dirty_family = None if active_person: self.active_person_edit.show() self.active_family_edit.hide() self.active_family_label.hide() # Fill in current person edits: name = name_displayer.display(active_person) self.active_person_widget.set_text("<i>%s</i> " % name) self.active_person_widget.set_use_markup(True) # Note: self.note = None note_list = active_person.get_referenced_note_handles() for (classname, note_handle) in note_list: note_obj = self.dbstate.db.get_note_from_handle(note_handle) if note_obj.get_type() == _("Person Note"): self.note = note_obj break if self.note is None: self.note = Note() self.texteditor.set_text(self.note.get_styledtext()) self.flow_changed(self.note.get_format()) # Family button: family_list = active_person.get_family_handle_list() if len(family_list) > 0: self._dirty_family = self.dbstate.db.get_family_from_handle( family_list[0]) self.active_family_edit.show() self.active_family_label.show() else: family_list = active_person.get_parent_family_handle_list() if len(family_list) > 0: self._dirty_family = self.dbstate.db.get_family_from_handle( family_list[0]) self.active_family_edit.show() self.active_family_label.show() else: self.clear_data_entry(None) self.active_person_edit.hide() self.active_family_edit.hide() self.active_family_label.hide() self._dirty = False
def clear_models(self): """ Clear the models. """ for model in self.models: self.notebook.remove_page(-1) self.models = [] self.changelist = [] self.indx = [] self.views = [] self.tb.set_text( StyledText( _('\n\nNotes selected on the left pane are shown Before cleanup in' ' this box.'))) self.ta.set_text( StyledText( _('\n\n' 'Notes selected on the left pane are shown After cleanup in this' ' box.\n' 'If you wish to make changes, you can make them here and' ' use the style controls in the toolbar above.'))) for title in self.titles: self.create_tab(title)
def get_text(self, start=None, end=None, include_hidden_chars=True): """ Return the buffer text. .. note:: ``s_`` prefix means StyledText*, while ``g_`` prefix means Gtk.*. """ if start is None: start = self.get_start_iter() if end is None: end = self.get_end_iter() txt = super(StyledTextBuffer, self).get_text(start, end, include_hidden_chars) txt = str(txt) # extract tags out of the buffer g_tags = self._get_tag_from_range() s_tags = [] for g_tagname, g_ranges in g_tags.items(): if g_tagname.startswith('link'): tag = self.get_tag_table().lookup(g_tagname) s_ranges = [(start, end + 1) for (start, end) in g_ranges] s_value = tag.data s_tag = StyledTextTag(_('Link'), s_value, s_ranges) s_tags.append(s_tag) else: style_and_value = g_tagname.split(' ', 1) try: style = int(style_and_value[0]) if len(style_and_value) == 1: s_value = None else: s_value = StyledTextTagType.STYLE_TYPE[style]\ (style_and_value[1]) if style in ALLOWED_STYLES: s_ranges = [(start, end + 1) for (start, end) in g_ranges] s_tag = StyledTextTag(style, s_value, s_ranges) s_tags.append(s_tag) except ValueError: _LOG.debug("silently skipping Gtk.TextTag '%s'" % g_tagname) return StyledText(txt, s_tags)
def get_notes(self, obj): """ Get the note list for the current object. """ self.left.set_sensitive(False) self.right.set_sensitive(False) self.texteditor.set_text(StyledText()) self.note_list = obj.get_note_list() self.page.set_text('') if len(self.note_list) > 0: self.set_has_data(True) if len(self.note_list) > 1: self.right.set_sensitive(True) self.current = 0 self.display_note() else: self.set_has_data(False)
def get_notes(self): """ Display all the To Do notes. """ self.left.set_sensitive(False) self.right.set_sensitive(False) self.edit.set_sensitive(False) self.texteditor.set_text(StyledText()) self.note_list = self.get_note_list() self.page.set_text('') self.title.set_text('') if len(self.note_list) > 0: self.set_has_data(True) self.edit.set_sensitive(True) if len(self.note_list) > 1: self.right.set_sensitive(True) self.current = 0 self.display_note() else: self.set_has_data(False)
def clear_data_entry(self, obj): self.note_buffer.set_text(StyledText()) self.flow_changed(False)
def find_records(db, filter, top_size, callname, trans_text=glocale.translation.sgettext, name_format=None, living_mode=LivingProxyDb.MODE_INCLUDE_ALL): """ @param trans_text: allow deferred translation of strings @type trans_text: a GrampsLocale sgettext instance trans_text is a defined keyword (see po/update_po.py, po/genpot.sh) :param name_format: optional format to control display of person's name :type name_format: None or int :param living_mode: enable optional control of living people's records :type living_mode: int """ def get_unfiltered_person_from_handle(person_handle): if living_mode == LivingProxyDb.MODE_INCLUDE_ALL: return db.get_person_from_handle(person_handle) else: # we are in the proxy so get the person before proxy changes return db.get_unfiltered_person(person_handle) today = datetime.date.today() today_date = Date(today.year, today.month, today.day) # Person records person_youngestliving = [] person_oldestliving = [] person_youngestdied = [] person_oldestdied = [] person_youngestmarried = [] person_oldestmarried = [] person_youngestdivorced = [] person_oldestdivorced = [] person_youngestfather = [] person_youngestmother = [] person_oldestfather = [] person_oldestmother = [] person_handle_list = db.iter_person_handles() if filter: person_handle_list = filter.apply(db, person_handle_list) for person_handle in person_handle_list: person = db.get_person_from_handle(person_handle) unfil_person = get_unfiltered_person_from_handle(person_handle) if person is None: continue # FIXME this should check for a "fallback" birth also/instead birth_ref = person.get_birth_ref() if not birth_ref: # No birth event, so we can't calculate any age. continue birth = db.get_event_from_handle(birth_ref.ref) birth_date = birth.get_date_object() death_date = _find_death_date(db, person) if not _good_date(birth_date): # Birth date unknown or incomplete, so we can't calculate any age. continue name = _get_styled_primary_name(person, callname, trans_text=trans_text, name_format=name_format) if death_date is None: if probably_alive(unfil_person, db): # Still living, look for age records _record(person_youngestliving, person_oldestliving, today_date - birth_date, name, 'Person', person_handle, top_size) elif _good_date(death_date): # Already died, look for age records _record(person_youngestdied, person_oldestdied, death_date - birth_date, name, 'Person', person_handle, top_size) for family_handle in person.get_family_handle_list(): family = db.get_family_from_handle(family_handle) marriage_date = None divorce_date = None for event_ref in family.get_event_ref_list(): event = db.get_event_from_handle(event_ref.ref) if (event.get_type().is_marriage() and (event_ref.get_role().is_family() or event_ref.get_role().is_primary())): marriage_date = event.get_date_object() elif (event.get_type().is_divorce() and (event_ref.get_role().is_family() or event_ref.get_role().is_primary())): divorce_date = event.get_date_object() if _good_date(marriage_date): _record(person_youngestmarried, person_oldestmarried, marriage_date - birth_date, name, 'Person', person_handle, top_size) if _good_date(divorce_date): _record(person_youngestdivorced, person_oldestdivorced, divorce_date - birth_date, name, 'Person', person_handle, top_size) for child_ref in family.get_child_ref_list(): if person.get_gender() == person.MALE: relation = child_ref.get_father_relation() elif person.get_gender() == person.FEMALE: relation = child_ref.get_mother_relation() else: continue if relation != ChildRefType.BIRTH: continue child = db.get_person_from_handle(child_ref.ref) # FIXME this should check for a "fallback" birth also/instead child_birth_ref = child.get_birth_ref() if not child_birth_ref: continue child_birth = db.get_event_from_handle(child_birth_ref.ref) child_birth_date = child_birth.get_date_object() if not _good_date(child_birth_date): continue if person.get_gender() == person.MALE: _record(person_youngestfather, person_oldestfather, child_birth_date - birth_date, name, 'Person', person_handle, top_size) elif person.get_gender() == person.FEMALE: _record(person_youngestmother, person_oldestmother, child_birth_date - birth_date, name, 'Person', person_handle, top_size) # Family records family_mostchildren = [] family_youngestmarried = [] family_oldestmarried = [] family_shortest = [] family_longest = [] family_smallestagediff = [] family_biggestagediff = [] for family in db.iter_families(): #family = db.get_family_from_handle(family_handle) if living_mode != LivingProxyDb.MODE_INCLUDE_ALL: # FIXME no iter_families method in LivingProxyDb so do it this way family = db.get_family_from_handle(family.get_handle()) father_handle = family.get_father_handle() if not father_handle: continue mother_handle = family.get_mother_handle() if not mother_handle: continue # Test if either father or mother are in filter if filter: if not filter.apply(db, [father_handle, mother_handle]): continue father = db.get_person_from_handle(father_handle) unfil_father = get_unfiltered_person_from_handle(father_handle) if father is None: continue mother = db.get_person_from_handle(mother_handle) unfil_mother = get_unfiltered_person_from_handle(mother_handle) if mother is None: continue name = StyledText(trans_text("%(father)s and %(mother)s")) % { 'father': _get_styled_primary_name(father, callname, trans_text=trans_text, name_format=name_format), 'mother': _get_styled_primary_name(mother, callname, trans_text=trans_text, name_format=name_format) } if (living_mode == LivingProxyDb.MODE_INCLUDE_ALL or (not probably_alive(unfil_father, db) and not probably_alive(unfil_mother, db))): _record(None, family_mostchildren, len(family.get_child_ref_list()), name, 'Family', family.handle, top_size) father_birth_ref = father.get_birth_ref() if father_birth_ref: father_birth_date = db.get_event_from_handle( father_birth_ref.ref).get_date_object() else: father_birth_date = None mother_birth_ref = mother.get_birth_ref() if mother_birth_ref: mother_birth_date = db.get_event_from_handle( mother_birth_ref.ref).get_date_object() else: mother_birth_date = None if _good_date(father_birth_date) and _good_date(mother_birth_date): if father_birth_date >> mother_birth_date: _record(family_smallestagediff, family_biggestagediff, father_birth_date - mother_birth_date, name, 'Family', family.handle, top_size) elif mother_birth_date >> father_birth_date: _record(family_smallestagediff, family_biggestagediff, mother_birth_date - father_birth_date, name, 'Family', family.handle, top_size) marriage_date = None divorce = None divorce_date = None for event_ref in family.get_event_ref_list(): event = db.get_event_from_handle(event_ref.ref) if (event.get_type().is_marriage() and (event_ref.get_role().is_family() or event_ref.get_role().is_primary())): marriage_date = event.get_date_object() if (event and event.get_type().is_divorce() and (event_ref.get_role().is_family() or event_ref.get_role().is_primary())): divorce = event divorce_date = event.get_date_object() father_death_date = _find_death_date(db, father) mother_death_date = _find_death_date(db, mother) if not _good_date(marriage_date): # Not married or marriage date unknown continue if divorce is not None and not _good_date(divorce_date): # Divorced but date unknown or inexact continue if (not probably_alive(unfil_father, db) and not _good_date(father_death_date)): # Father died but death date unknown or inexact continue if (not probably_alive(unfil_mother, db) and not _good_date(mother_death_date)): # Mother died but death date unknown or inexact continue if (divorce_date is None and father_death_date is None and mother_death_date is None): # Still married and alive if (probably_alive(unfil_father, db) and probably_alive(unfil_mother, db)): _record(family_youngestmarried, family_oldestmarried, today_date - marriage_date, name, 'Family', family.handle, top_size) elif (_good_date(divorce_date) or _good_date(father_death_date) or _good_date(mother_death_date)): end = None if _good_date(father_death_date) and _good_date(mother_death_date): end = min(father_death_date, mother_death_date) elif _good_date(father_death_date): end = father_death_date elif _good_date(mother_death_date): end = mother_death_date if _good_date(divorce_date): if end: end = min(end, divorce_date) else: end = divorce_date duration = end - marriage_date _record(family_shortest, family_longest, duration, name, 'Family', family.handle, top_size) #python 3 workaround: assign locals to tmp so we work with runtime version tmp = locals() return [(trans_text(text), varname, tmp[varname]) for (text, varname, default) in RECORDS]
def convert_to_styled(self, data): """ This scans incoming notes for possible html. It converts a select few tags into StyledText and removes the rest of the tags. Notes of this type occur in data from FTM and ancestry.com. Result is a much cleaner note. @param data: a string of text possibly containg html @type data: str """ prev = 0 chunkpos = 0 chunks = [] italics = [] bolds = [] unders = [] links = [] reds = [] bldpos = -1 # data = html.unescape(data) # clean up escaped html "<" etc. for mo in re.finditer(html._charref, data._string): out = html._replace_charref(mo) in_start = mo.start() in_end = mo.end() data._string = (data._string[:in_start] + out + data._string[(in_start + len(out)):]) if prev != in_start + len(out): chunks.append(data[prev:(in_start + len(out))]) chunkpos += (in_start - prev + len(out)) prev = in_end chunks.append(data[prev:]) data = StyledText().join(chunks) prev = 0 chunkpos = 0 chunks = [] for mo in re.finditer(self.tok_regex, data._string, flags=(re.DOTALL | re.I)): kind = mo.lastgroup st_txt = mo.group(kind) in_start = mo.start() in_end = mo.end() if kind == 'SKIP' or kind == 'TABLE': if prev != in_start: chunks.append(data[prev:in_start]) chunkpos += (in_start - prev) elif kind == 'PARAEND': chunks.append(data[prev:in_start] + '\n') chunkpos += (in_start - prev + 1) elif kind == 'ITALIC': chunks.append(data[prev:in_start] + data[(in_start + 3):in_end]) newpos = chunkpos - prev + in_end - 3 italics.append((chunkpos + in_start - prev, newpos)) chunkpos = newpos elif kind == 'BOLD': chunks.append(data[prev:in_start] + data[(in_start + 3):in_end]) newpos = chunkpos - prev + in_end - 3 bolds.append((chunkpos + in_start - prev, newpos)) chunkpos = newpos elif kind == 'UNDER': chunks.append(data[prev:in_start] + data[(in_start + 3):in_end]) newpos = chunkpos - prev + in_end - 3 unders.append((chunkpos + in_start - prev, newpos)) chunkpos = newpos elif kind == 'HTTP': # HTTP found st_txt = mo.group('HTTP') oldpos = chunkpos + in_start - prev chunks.append(data[prev:in_start] + st_txt) chunkpos += (in_start - prev + len(st_txt)) st_txt = st_txt.rstrip(' .:)') newpos = oldpos + len(st_txt) links.append((st_txt, oldpos, newpos)) elif kind == 'HREF': # HREF found st_txt = mo.group('HREFT') lk_txt = mo.group('HREFL') # fix up relative links emmitted by ancestry.com if (lk_txt.startswith("/search/dbextra") or lk_txt.startswith("/handler/domain")): lk_txt = "http://search.ancestry.com" + lk_txt oldpos = chunkpos + in_start - prev # if tag (minus any trailing '.') is substring of link if st_txt[0:-1] in lk_txt: st_txt = lk_txt # just use the link else: # use link and tag st_txt = " " + lk_txt + " (" + st_txt + ")" newpos = oldpos + len(st_txt) chunks.append(data[prev:in_start] + st_txt) chunkpos += (in_start - prev + len(st_txt)) links.append((lk_txt, oldpos, newpos)) elif kind == 'TBLCELL' or kind == 'TBLHDRC': # Table cell break chunks.append(data[prev:in_start] + ': ') chunkpos += (in_start - prev + 3) elif kind == 'TBLHDRB': # header start if prev != in_start: chunks.append(data[prev:in_start]) chunkpos += (in_start - prev) bldpos = chunkpos elif kind == 'TBLHDRE': # Header end if bldpos == -1: if prev != in_start: chunks.append(data[prev:in_end]) newpos = chunkpos - prev + in_end reds.append((chunkpos + in_start - prev, newpos)) chunkpos = newpos print('Invalid table header, no start tag found') else: if prev != in_start: chunks.append(data[prev:in_start]) chunkpos += (in_start - prev) bolds.append((bldpos, chunkpos)) bldpos = -1 elif kind == 'UNKNWN': chunks.append(data[prev:in_end]) newpos = chunkpos - prev + in_end reds.append((chunkpos + in_start - prev, newpos)) chunkpos = newpos print('Unexpected or unimplemented HTML tag', st_txt) else: print("shouldn't get here") prev = in_end chunks.append(data[prev:]) result = StyledText().join(chunks) tags = [] for link in links: tags.append( StyledTextTag(StyledTextTagType.LINK, link[0], [(link[1], link[2])])) if italics: tags.append(StyledTextTag(StyledTextTagType.ITALIC, False, italics)) if bolds: tags.append(StyledTextTag(StyledTextTagType.BOLD, False, bolds)) if unders: tags.append( StyledTextTag(StyledTextTagType.UNDERLINE, False, unders)) if reds: tags.append( StyledTextTag(StyledTextTagType.HIGHLIGHT, '#FFFF00', reds)) return StyledText(result._string, tag_merge(result._tags, tags))
def convert_to_styled(self, data): """ This scans incoming notes for possible html. It converts a select few tags into StyledText and removes the rest of the tags. Notes of this type occur in data from FTM and ancestry.com. Result is a much cleaner note. @param data: a string of text possibly containg html @type data: str """ token_specification = [ # Italics: must not be nested, any tag terminates ('ITALIC', r'<i>.*?(?=<)'), # bolds: must not be nested, any tag terminates ('BOLD', r'<b>.*?(?=<)'), # Underlines: must not be nested, any tag terminates ('UNDER', r'<u>.*?(?=<)'), # Table Header Begin (start Bold) ('TBLHDRB', r'<tr><th>'), # Table Header End (end Bold and \n) ('TBLHDRE', r'</th></tr>'), # Table Header Cell (repl with ': ') ('TBLHDRC', r'(<\th>)?<th>'), # Table Cell break (repl with ': ') ('TBLCELL', r'</td><td>'), # Table ('TABLE', r'</?table.*?>'), # Href start to end ('HREF', r'<+a .*?href=["\' ]*(?P<HREFL>.*?)'\ r'["\' ].*?>(?P<HREFT>.*?)</a>+'), # HTTP start to end (have to rstrip(' .:') for link) ('HTTP', r'https?:.*?(\s|$)'), # Paragraph end ('PARAEND', r'</p>|</li>|<tr>|<br>'), # Skip over these tags ('SKIP', r'<ul>|</ul>|<li>|<p>|</tr>|<td>|</td>|<th>|'\ r'</a>|</i>|</b>|</u>'), # Unimplemented HTTP tags ('UNKNWN', r'<.*?>'), ] tok_regex = '|'.join('(?P<%s>%s)' % pair for pair in token_specification) prev = 0 chunkpos = 0 chunks = [] italics = [] bolds = [] unders = [] links = [] reds = [] bldpos = -1 data = html.unescape(data) # clean up escaped html "<" etc. for mo in re.finditer(tok_regex, data, flags=(re.DOTALL | re.I)): kind = mo.lastgroup st_txt = mo.group(kind) in_start = mo.start() in_end = mo.end() if kind == 'SKIP' or kind == 'TABLE': if prev != in_start: chunks.append(data[prev:in_start]) chunkpos += (in_start - prev) elif kind == 'PARAEND': chunks.append(data[prev:in_start] + '\n') chunkpos += (in_start - prev + 1) elif kind == 'ITALIC': chunks.append(data[prev:in_start] + data[(in_start + 3):in_end]) newpos = chunkpos - prev + in_end - 3 italics.append((chunkpos + in_start - prev, newpos)) chunkpos = newpos elif kind == 'BOLD': chunks.append(data[prev:in_start] + data[(in_start + 3):in_end]) newpos = chunkpos - prev + in_end - 3 bolds.append((chunkpos + in_start - prev, newpos)) chunkpos = newpos elif kind == 'UNDER': chunks.append(data[prev:in_start] + data[(in_start + 3):in_end]) newpos = chunkpos - prev + in_end - 3 unders.append((chunkpos + in_start - prev, newpos)) chunkpos = newpos elif kind == 'HTTP': # HTTP found st_txt = mo.group('HTTP') oldpos = chunkpos + in_start - prev chunks.append(data[prev:in_start] + st_txt) chunkpos += (in_start - prev + len(st_txt)) st_txt = st_txt.rstrip(' .:)') newpos = oldpos + len(st_txt) links.append((st_txt, oldpos, newpos)) elif kind == 'HREF': # HREF found st_txt = mo.group('HREFT') lk_txt = mo.group('HREFL') # fix up relative links emmitted by ancestry.com if(lk_txt.startswith("/search/dbextra") or lk_txt.startswith("/handler/domain")): lk_txt = "http://search.ancestry.com" + lk_txt oldpos = chunkpos + in_start - prev # if tag (minus any trailing '.') is substring of link if st_txt[0:-1] in lk_txt: st_txt = lk_txt # just use the link else: # use link and tag st_txt = " " + lk_txt + " (" + st_txt + ")" newpos = oldpos + len(st_txt) chunks.append(data[prev:in_start] + st_txt) chunkpos += (in_start - prev + len(st_txt)) links.append((lk_txt, oldpos, newpos)) elif kind == 'TBLCELL' or kind == 'TBLHDRC': # Table cell break chunks.append(data[prev:in_start] + ': ') chunkpos += (in_start - prev + 3) elif kind == 'TBLHDRB': # header start if prev != in_start: chunks.append(data[prev:in_start]) chunkpos += (in_start - prev) bldpos = chunkpos elif kind == 'TBLHDRE': # Header end if bldpos == -1: if prev != in_start: chunks.append(data[prev:in_end]) newpos = chunkpos - prev + in_end reds.append((chunkpos + in_start - prev, newpos)) chunkpos = newpos print('Invalid table header, no start tag found') else: if prev != in_start: chunks.append(data[prev:in_start]) chunkpos += (in_start - prev) bolds.append((bldpos, chunkpos)) bldpos = -1 elif kind == 'UNKNWN': if prev != in_start: chunks.append(data[prev:in_end]) newpos = chunkpos - prev + in_end reds.append((chunkpos + in_start - prev, newpos)) chunkpos = newpos print('Unexpected or unimplemented HTML tag', st_txt) else: print("shouldn't get here") prev = in_end chunks.append(data[prev:]) result = ''.join(chunks) tags = [] for link in links: tags.append(StyledTextTag(StyledTextTagType.LINK, link[0], [(link[1], link[2])])) if italics: tags.append(StyledTextTag(StyledTextTagType.ITALIC, False , italics)) if bolds: tags.append(StyledTextTag(StyledTextTagType.BOLD, False , bolds)) if unders: tags.append(StyledTextTag(StyledTextTagType.UNDERLINE, False , unders)) if reds: tags.append(StyledTextTag(StyledTextTagType.HIGHLIGHT, '#FFFF00', reds)) return StyledText(result, tags)
def linkst(text, url): """ Return text as link styled text """ tags = [StyledTextTag(StyledTextTagType.LINK, url, [(0, len(text))])] return StyledText(text, tags)
def boldst(text): """ Return text as bold styled text """ tags = [StyledTextTag(StyledTextTagType.BOLD, True, [(0, len(text))])] return StyledText(text, tags)
def st(text): """ Return text as styled text """ return StyledText(text)