def handle_repositories(self): ''' Get all the repositories in the collection. ''' repositories = self.collection.getElementsByTagName("repository") print("***** {} Repositories *****".format(len(repositories))) t0 = time.time() counter = 0 # Print detail of each repository for repository in repositories: r = Repository() # Extract handle, change and id self._extract_base(repository, r) if len(repository.getElementsByTagName('rname')) == 1: repository_rname = repository.getElementsByTagName('rname')[0] r.rname = repository_rname.childNodes[0].data elif len(repository.getElementsByTagName('rname')) > 1: self.blog.log_event({ 'title': "More than one rname in a repository", 'level': "WARNING", 'count': r.id }) if len(repository.getElementsByTagName('type')) == 1: repository_type = repository.getElementsByTagName('type')[0] r.type = repository_type.childNodes[0].data elif len(repository.getElementsByTagName('type')) > 1: self.blog.log_event({ 'title': "More than one type in a repository", 'level': "WARNING", 'count': r.id }) for repository_url in repository.getElementsByTagName('url'): n = Note() n.url = repository_url.getAttribute("href") n.type = repository_url.getAttribute("type") n.text = repository_url.getAttribute("description") if n.url: r.notes.append(n) self.save_and_link_handle(r, batch_id=self.batch_id) counter += 1 self.blog.log_event({ 'title': "Repositories", 'count': counter, 'elapsed': time.time() - t0 }) #, 'percent':1})
def handle_notes(self): ''' Get all the notes in the collection. ''' notes = self.collection.getElementsByTagName("note") print("***** {} Notes *****".format(len(notes))) t0 = time.time() counter = 0 for note in notes: n = Note() # Extract handle, change and id self._extract_base(note, n) n.priv = get_priv(note) if note.hasAttribute("type"): n.type = note.getAttribute("type") if len(note.getElementsByTagName('text')) == 1: note_text = note.getElementsByTagName('text')[0] n.text = note_text.childNodes[0].data # Pick possible url n.text, n.url = pick_url(n.text) #TODO: 17.10.2018 suunniteltiin, että kuolinsyyt # konvertoitaisiin heti Note-nodeiksi sopivalla node-tyypillä #print("iNote {}".format(n)) self.save_and_link_handle(n, batch_id=self.batch_id) counter += 1 self.blog.log_event({ 'title': "Notes", 'count': counter, 'elapsed': time.time() - t0 }) #, 'percent':1})
def get_repositories(uniq_id=None): """ Lukee tietokannasta Repository- ja Source- objektit näytettäväksi (Korvaa read_repositories() ╒════════╤════════╤════════╤════════╤════════╤═══════╤════════╤════════╕ │"uniq_id│"rname" │"type" │"change"│"handle"│"id" │"sources│"notes" │ │" │ │ │ │ │ │" │ │ ╞════════╪════════╪════════╪════════╪════════╪═══════╪════════╪════════╡ │25979 │"Haminan│"Library│"1526233│"_de18a0│"R0000"│[[25992,│[[...], │ │ │ kaupung│" │479" │b2d546e2│ │"Haminan│] │ │ │inarkist│ │ │22251e54│ │ asukasl│ │ │ │o" │ │ │9f2bd" │ │uettelo │ │ │ │ │ │ │ │ │1800-182│ │ │ │ │ │ │ │ │0","Book│ │ │ │ │ │ │ │ │"]] │ │ └────────┴────────┴────────┴────────┴────────┴───────┴────────┴────────┘ """ titles = ['change', 'uniq_id', 'id', 'rname', 'type', 'sources', 'notes'] repositories = [] result = Repository.get_w_source(uniq_id) for record in result: # <Record uniq_id=138741 rname='8. Suomenmaalaisen sotilasseurakunnan arkisto' # type='Library' change='1541271759' handle='_e048d8ea78c7afc76c452682e16' id='R0215' # sources=[[142172, '8. M metrikka 1908-1908 (I C:1)', 'Book']] # webref=[]> r = Repository() r.uniq_id = record['uniq_id'] r.rname = record['rname'] or '' r.change = record['change'] #r.handle = record['handle'] r.type = record['type'] or '' r.id = record['id'] or '' for node in record['notes']: n = Note.from_node(node) r.notes.append(n) for node in record['sources']: s = Source() s.uniq_id = node[0] s.stitle = node[1] s.sauthor = node[2] s.spubinfo = node[3] s.reporef_medium = node[4] #Todo: Should use repository.medium r.sources.append(s) repositories.append(r) return (titles, repositories)
def save(self, tx, **kwargs): # batch_id): """ Saves the Person object and possibly the Names, Events ja Citations. On return, the self.uniq_id is set @todo: Remove those referenced person names, which are not among new names (:Person) --> (:Name) """ if 'batch_id' in kwargs: batch_id = kwargs['batch_id'] else: raise RuntimeError( f"Person_gramps.save needs batch_id for {self.id}") dbdriver = Neo4jWriteDriver(shareds.driver, tx) db = DBwriter(dbdriver) today = str(datetime.date.today()) self.uuid = self.newUuid() # Save the Person node under UserProfile; all attributes are replaced p_attr = {} try: p_attr = { "uuid": self.uuid, "handle": self.handle, "change": self.change, "id": self.id, "priv": self.priv, "sex": self.sex, "confidence": self.confidence, "sortname": self.sortname } if self.dates: p_attr.update(self.dates.for_db()) result = tx.run(Cypher_person_w_handle.create_to_batch, batch_id=batch_id, p_attr=p_attr, date=today) ids = [] for record in result: self.uniq_id = record[0] ids.append(self.uniq_id) if len(ids) > 1: print("iError updated multiple Persons {} - {}, attr={}". format(self.id, ids, p_attr)) # print("Person {} ".format(self.uniq_id)) if self.uniq_id == None: print("iWarning got no uniq_id for Person {}".format(p_attr)) except Exception as err: logger.error( f"Person_gramps.save: {err} in Person {self.id} {p_attr}") #print("iError: Person_gramps.save: {0} attr={1}".format(err, p_attr), file=stderr) # Save Name nodes under the Person node for name in self.names: name.save(tx, parent_id=self.uniq_id) # Save web urls as Note nodes connected under the Person if self.notes: Note.save_note_list(tx, self) ''' Connect to each Event loaded from Gramps ''' try: for i in range(len(self.eventref_hlink)): tx.run(Cypher_person_w_handle.link_event, p_handle=self.handle, e_handle=self.eventref_hlink[i], role=self.eventref_role[i]) except Exception as err: logger.error( f"Person_gramps.save: {err} in linking Event {self.handle} -> {self.eventref_hlink[i]}" ) #print("iError: Person_gramps.save events: {0} {1}".format(err, self.id), file=stderr) # Make relations to the Media nodes and it's Note and Citation references db.media_save_w_handles(self.uniq_id, self.media_refs) # The relations to the Family node will be created in Family.save(), # because the Family object is not yet created # Make relations to the Note nodes try: for handle in self.noteref_hlink: tx.run(Cypher_person_w_handle.link_note, p_handle=self.handle, n_handle=handle) except Exception as err: logger.error( f"Person_gramps.save: {err} in linking Notes {self.handle} -> {handle}" ) # Make relations to the Citation nodes try: for handle in self.citationref_hlink: tx.run(Cypher_person_w_handle.link_citation, p_handle=self.handle, c_handle=handle) except Exception as err: logger.error( f"Person_gramps.save: {err} in linking Citations {self.handle} -> {handle}" ) return
def get_note_list(uniq_id=None): """ Lukee tietokannasta Note- objektit näytettäväksi """ titles, notes = Note.get_note_list(uniq_id) return (titles, notes)
def get_person_data_by_id(pid): """ Get 5 data sets: ---- vanhempi versio ---- ###Obsolete? still used in - /compare/uniq_id=311006,315556 - /lista/person_data/<string:uniq_id> - /lista/person_data/<string:uniq_id> The given pid may be an uuid (str) or uniq_id (int). person: Person object with name data The indexes of referred objects are in variables event_ref[] str tapahtuman uniq_id, rooli eventref_role[] media_ref[] str tallenteen uniq_id parentin_hlink[] str vanhempien uniq_id note_ref[] str huomautuksen uniq_id citation_ref[] str viittauksen uniq_id events[] Event_combo with location name and id (?) photos citations families """ p = Person_combo() if isinstance(pid, int): p.uniq_id = pid else: p.uuid = pid # Get Person and her Name properties, also Note properties p.get_person_w_names() # Get reference (uniq_id) and role for Events # Get references to Media, Citation objects # Get Persons birth family reference and role p.get_hlinks_by_id() # Person_display(Person) events = [] citations = [] photos = [] source_cnt = 0 my_birth_date = '' # Events for i in range(len(p.event_ref)): # Store Event data e = Event_combo() # Event_for_template() e.uniq_id = p.event_ref[i] e.role = p.eventref_role[i] # Read event with uniq_id's of related Place (Note, and Citation?) e.get_event_combo() # Read data to e if e.type == "Birth": my_birth_date = e.dates.estimate() for ref in e.place_ref: place = Place_combo.get_w_notes(ref) # place.read_w_notes() # Location / place name, type and reference e.place = place # #TODO: remove 3 lines # e.location = place.pname # e.locid = place.uniq_id # e.ltype = place.type if e.note_ref: # A list of uniq_ids; prev. e.noteref_hlink != '': # Read the Note objects from db and store them as a member of Event e.notes = Note.get_notes(e.note_ref) events.append(e) # Citations for ref in e.citation_ref: # citationref_hlink != '': c = Citation() c.uniq_id = ref # If there is already the same citation on the list of citations, # use that index citation_ind = -1 for i in range(len(citations)): if citations[i].uniq_id == c.uniq_id: citation_ind = i + 1 break if citation_ind > 0: # Citation found; Event_combo.source = sitaatin numero e.source = citation_ind else: # Store the new source to the list # source = lähteen numero samassa listassa source_cnt += 1 e.source = source_cnt result = c.get_source_repo(c.uniq_id) for record in result: # Citation data & list of Source, Repository and Note data # # <Record id=92127 date='2017-01-25' page='1785 Novembr 3. kaste' # confidence='3' notetext='http://www.sukuhistoria.fi/...' # sources=[ # [91360, # 'Lapinjärvi syntyneet 1773-1787 vol es346', # 'Book', # 100272, # 'Lapinjärven seurakunnan arkisto', # 'Archive'] # ] # url='http://..."> c.dateval = record['date'] c.page = record['page'] c.confidence = record['confidence'] if not record['notetext']: if c.page[:4] == "http": c.notetext = c.page c.page = '' else: c.notetext = record['notetext'] for source in record['sources']: s = Source() s.uniq_id = source[0] s.stitle = source[1] s.sauthor = source[2] s.spubinfo = source[3] s.reporef_medium = source[ 4] #Todo: Should use repository.medium r = Repository() r.uniq_id = source[5] r.rname = source[6] r.type = source[7] s.repositories.append(r) n = Note() n.url = record['url'] s.notes.append(n) c.source = s print("Eve:{} {} > Cit:{} '{}' > Sour:{} '{}' '{}' '{}' > Repo:{} '{}' > Url: '{}'".\ format(e.uniq_id, e.id, c.uniq_id, c.page, s.uniq_id, s.stitle, s.sauthor, s.spubinfo, r.uniq_id, r.rname, n.url, )) citations.append(c) for uniq_id in p.media_ref: o = Media.get_one(uniq_id) photos.append(o) # Families # Returning a list of Family objects # - which include a list of members (Person with 'role' attribute) # - Person includes a list of Name objects families = {} fid = 0 result = Person_combo.get_family_members(p.uniq_id) for record in result: # <Record family_id='F0018' f_uniq_id=217546 role='PARENT' parent_role='mother' # m_id='I0038' uniq_id=217511 sex=2 birth_date=[0, 1892433, 1892433] # names=[ # <Node id=217512 labels={'Name'} properties={'firstname': 'Brita Kristina', # 'type': 'Birth Name', 'suffix': 'Eriksdotter', 'surname': 'Berttunen', # 'order': 0}>, # <Node id=217513 labels={'Name'} properties={'firstname': 'Brita Kristina', # 'type': 'Married Name', 'suffix': '', 'surname': 'Silius', # 'order': 1}>]> if fid != record["f_uniq_id"]: fid = record["f_uniq_id"] if not fid in families: families[fid] = Family_combo(fid) families[fid].id = record['family_id'] member = Person_as_member() # A kind of Person member.role = record["role"] member.id = record["m_id"] member.uniq_id = record["uniq_id"] if member.uniq_id == p.uniq_id: # What kind of family this is? I am a Child or Parent in family if member.role == "CHILD": families[fid].role = "CHILD" else: families[fid].role = "PARENT" if my_birth_date: member.birth_date = my_birth_date if record["sex"]: member.sex = record["sex"] if record["birth_date"]: datetype, date1, date2 = record["birth_date"] if datetype != None: member.birth_date = DateRange(datetype, date1, date2).estimate() if record["names"]: for node in record["names"]: n = Name.from_node(node) member.names.append(n) if member.role == "CHILD": families[fid].children.append(member) elif member.role == "PARENT": parent_role = record["parent_role"] if parent_role == 'mother': families[fid].mother = member elif parent_role == 'father': families[fid].father = member # TODO: Remove these, obsolete elif member.role == "FATHER": families[fid].father = member elif member.role == "MOTHER": families[fid].mother = member family_list = list(families.values()) # Find all referenced for the nodes found so far nodes = {p.uniq_id: p} for e in events: nodes[e.uniq_id] = e for e in photos: nodes[e.uniq_id] = e for e in citations: nodes[e.uniq_id] = e for e in family_list: nodes[e.uniq_id] = e return (p, events, photos, citations, family_list)
def handle_places(self): ''' Get all the places in the collection. To create place hierarchy links, there must be a dictionary of Place handles and uniq_ids created so far. The link may use previous node or create a new one. ''' place_keys = {} # place_keys[handle] = uniq_id places = self.collection.getElementsByTagName("placeobj") print("***** {} Places *****".format(len(places))) t0 = time.time() counter = 0 # Print detail of each placeobj for placeobj in places: pl = Place_gramps() # Extract handle, change and id self._extract_base(placeobj, pl) pl.type = placeobj.getAttribute("type") # List of upper places in hierarchy as {hlink, dates} dictionaries pl.surround_ref = [] # Note. The ptitle is never saved to Place object! if len(placeobj.getElementsByTagName('ptitle')) == 1: placeobj_ptitle = placeobj.getElementsByTagName('ptitle')[0] pl.ptitle = placeobj_ptitle.childNodes[0].data elif len(placeobj.getElementsByTagName('ptitle')) > 1: self.blog.log_event({ 'title': "More than one ptitle in a place", 'level': "WARNING", 'count': pl.id }) place_order = 0 for placeobj_pname in placeobj.getElementsByTagName('pname'): if placeobj_pname.hasAttribute("value"): placename = PlaceName() placename.order = place_order place_order += 1 placename.name = placeobj_pname.getAttribute("value") #print(f"# placeobj {pl.id} pname {place_order} {placename.name}") if placename.name: if pl.pname == '': # First name is default pname for Place node pl.pname = placename.name placename.lang = placeobj_pname.getAttribute("lang") pl.names.append(placename) else: self.blog.log_event({ 'title': "An empty place name discarded", 'level': "WARNING", 'count': f"{pl.id}({place_order})" }) place_order -= 1 try: # Returns Gramps_DateRange or None placename.dates = self._extract_daterange(placeobj_pname) #TODO: val="1700-luvulla" muunnettava Noteksi except: placename.dates = None for placeobj_coord in placeobj.getElementsByTagName('coord'): if placeobj_coord.hasAttribute("lat") \ and placeobj_coord.hasAttribute("long"): lat = placeobj_coord.getAttribute("lat") long = placeobj_coord.getAttribute("long") if pl.coord: self.blog.log_event({ 'title': "More than one coordinates in a place", 'level': "WARNING", 'count': pl.id }) else: try: pl.coord = Point(lat, long) except Exception as e: self.blog.log_event({ 'title': "Invalid coordinates - {}".format(e), 'level': "WARNING", 'count': pl.id }) for placeobj_url in placeobj.getElementsByTagName('url'): n = Note() n.priv = get_priv(placeobj_url) n.url = placeobj_url.getAttribute("href") n.type = placeobj_url.getAttribute("type") n.text = placeobj_url.getAttribute("description") if n.url: pl.notes.append(n) for placeobj_placeref in placeobj.getElementsByTagName('placeref'): # Traverse links to surrounding (upper) places hlink = placeobj_placeref.getAttribute("hlink") dates = self._extract_daterange(placeobj_placeref) # surround_ref elements example # {'hlink': '_ddd3...', 'dates': <Gramps_DateRange object>} pl.surround_ref.append({'hlink': hlink, 'dates': dates}) ##print(f'# Place {pl.id} is surrouded by {pl.surround_ref[-1]["hlink"]}') for placeobj_noteref in placeobj.getElementsByTagName('noteref'): if placeobj_noteref.hasAttribute("hlink"): pl.noteref_hlink.append( placeobj_noteref.getAttribute("hlink")) ##print(f'# Place {pl.id} has note {pl.noteref_hlink[-1]}') # Handle <objref> pl.media_refs = self._extract_mediaref(placeobj) if pl.media_refs: print(f'# saving Place {pl.id}: media_refs {pl.media_refs}') # Save Place, Place_names, Notes and connect to hierarchy self.save_and_link_handle(pl, batch_id=self.batch_id, place_keys=place_keys) # The place_keys has been updated counter += 1 self.blog.log_event({ 'title': "Places", 'count': counter, 'elapsed': time.time() - t0 }) #, 'percent':1})
def handle_people(self): ''' Get all the people in the collection. ''' people = self.collection.getElementsByTagName("person") person_count = len(people) print("***** {} Persons *****".format(person_count)) t0 = time.time() counter = 0 # Get details of each person for person in people: name_order = 0 p = Person_gramps() # Extract handle, change and id self._extract_base(person, p) for person_gender in person.getElementsByTagName('gender'): if p.sex: self.blog.log_event({ 'title': "More than one sexes in a person", 'level': "WARNING", 'count': p.id }) break p.sex = p.sex_from_str(person_gender.childNodes[0].data) for person_name in person.getElementsByTagName('name'): pname = Name() pname.order = name_order name_order += 1 if person_name.hasAttribute("alt"): pname.alt = person_name.getAttribute("alt") if person_name.hasAttribute("type"): pname.type = person_name.getAttribute("type") for person_first in person_name.getElementsByTagName('first'): if pname.firstname: self.blog.log_event({ 'title': "More than one first name in a person", 'level': "WARNING", 'count': p.id }) break if len(person_first.childNodes) == 1: pname.firstname = person_first.childNodes[0].data elif len(person_first.childNodes) > 1: self.blog.log_event({ 'title': "More than one child node in a first name of a person", 'level': "WARNING", 'count': p.id }) if len(person_name.getElementsByTagName('surname')) == 1: person_surname = person_name.getElementsByTagName( 'surname')[0] if person_surname.hasAttribute("prefix"): pname.prefix = person_surname.getAttribute("prefix") if len(person_surname.childNodes) == 1: pname.surname = person_surname.childNodes[0].data elif len(person_surname.childNodes) > 1: self.blog.log_event({ 'title': "More than one child node in a surname of a person", 'level': "WARNING", 'count': p.id }) elif len(person_name.getElementsByTagName('surname')) > 1: self.blog.log_event({ 'title': "More than one surname in a person", 'level': "WARNING", 'count': p.id }) if len(person_name.getElementsByTagName('suffix')) == 1: person_suffix = person_name.getElementsByTagName( 'suffix')[0] pname.suffix = person_suffix.childNodes[0].data elif len(person_name.getElementsByTagName('suffix')) > 1: self.blog.log_event({ 'title': "More than one suffix in a person", 'level': "WARNING", 'count': p.id }) if len(person_name.getElementsByTagName('title')) == 1: person_title = person_name.getElementsByTagName('title')[0] pname.title = person_title.childNodes[0].data elif len(person_name.getElementsByTagName('title')) > 1: self.blog.log_event({ 'title': "More than one title in a person", 'level': "WARNING", 'count': p.id }) if len(person_name.getElementsByTagName('citationref')) >= 1: for i in range( len(person_name.getElementsByTagName( 'citationref'))): person_name_citationref = person_name.getElementsByTagName( 'citationref')[i] if person_name_citationref.hasAttribute("hlink"): pname.citation_handles.append( person_name_citationref.getAttribute("hlink")) ##print(f'# Person name for {p.id} has cite {pname.citation_handles[-1]}') p.names.append(pname) #TODO Muuttaisiko p.eventref_hlink = (hlink, role). Nyt voisi mennä epätahtiin for person_eventref in person.getElementsByTagName('eventref'): if person_eventref.hasAttribute("hlink"): p.eventref_hlink.append( person_eventref.getAttribute("hlink")) if person_eventref.hasAttribute("role"): p.eventref_role.append( person_eventref.getAttribute("role")) # Handle <objref> p.media_refs = self._extract_mediaref(person) for person_url in person.getElementsByTagName('url'): n = Note() n.priv = get_priv(person_url) n.url = person_url.getAttribute("href") n.type = person_url.getAttribute("type") n.text = person_url.getAttribute("description") if n.url: p.notes.append(n) for person_parentin in person.getElementsByTagName('parentin'): if person_parentin.hasAttribute("hlink"): p.parentin_hlink.append( person_parentin.getAttribute("hlink")) ##print(f'# Person {p.id} is parent in family {p.parentin_hlink[-1]}') for person_noteref in person.getElementsByTagName('noteref'): if person_noteref.hasAttribute("hlink"): p.noteref_hlink.append( person_noteref.getAttribute("hlink")) for person_citationref in person.getElementsByTagName( 'citationref'): if person_citationref.hasAttribute("hlink"): p.citationref_hlink.append( person_citationref.getAttribute("hlink")) ##print(f'# Person {p.id} has cite {p.citationref_hlink[-1]}') #for ref in p.media_refs: print(f'# saving Person {p.id}: media_ref {ref}') self.save_and_link_handle(p, batch_id=self.batch_id) #print(f'# Person [{p.handle}] --> {self.handle_to_node[p.handle]}') counter += 1 # The refnames will be set for these persons self.person_ids.append(p.uniq_id) self.blog.log_event({ 'title': "Persons", 'count': counter, 'elapsed': time.time() - t0 }) #, 'percent':1})