def migrate_feature_set(self): # Rather than do anything clever to handle the self-reference # on this model, just pass over them twice, adding the # references on the second round, since it is optional. self._id_map['feature_set'] = {} for legacy_feature_set in legacy.models.FeatureSet.objects.all(): order = self._get_feature_set_order(legacy_feature_set.n) name = migration_utils.regularise_text(legacy_feature_set.name) name = self._massage_feature_set_name(name) n = migration_utils.regularise_text( legacy_feature_set.n).strip('.') feature_set = sculpture.models.FeatureSet(order=order, name=name, n=n) self._migrate_timestamps(legacy_feature_set, feature_set) feature_set.save() self._id_map['feature_set'][legacy_feature_set.id] = feature_set.id for legacy_feature_set in legacy.models.FeatureSet.objects.all(): if legacy_feature_set.feature_set is not None: feature_set = self._get_object(sculpture.models.FeatureSet, 'feature_set', legacy_feature_set.id) parent_feature_set = self._get_object( sculpture.models.FeatureSet, 'feature_set', legacy_feature_set.feature_set.id) feature_set.feature_set = parent_feature_set self._migrate_timestamps(legacy_feature_set, feature_set) feature_set.save()
def migrate_contributor(self): self._id_map['contributor'] = {} for legacy_author in legacy.models.Author.objects.all(): contributor = self._migrate_contributor(legacy_author) legacy_id = 'a%d' % legacy_author.id self._id_map['contributor'][legacy_id] = contributor.id for legacy_photographer in legacy.models.Photographer.objects.all(): # There is overlap between Authors and Photographers, so # reuse an existing Contributor record. try: contributor = sculpture.models.Contributor.objects.get( name=migration_utils.regularise_text( legacy_photographer.name)) except sculpture.models.Contributor.DoesNotExist: contributor = self._migrate_contributor(legacy_photographer) legacy_id = 'p%d' % legacy_photographer.id self._id_map['contributor'][legacy_id] = contributor.id for legacy_site in legacy.models.SculptureSite.objects.all(): user = legacy_site.author.user try: contributor = sculpture.utils.get_profile(user).contributor except sculpture.models.Contributor.DoesNotExist: if user.first_name or user.last_name: name = '%s %s' % (user.first_name, user.last_name) else: name = user.username contributor = sculpture.models.Contributor( name=name, user_profile=sculpture.utils.get_profile(user), created=self._now, modified=self._now) contributor.save() legacy_id = 'u%d' % user.id self._id_map['contributor'][legacy_id] = contributor.id
def _get_publication(self, legacy_bibliography, author): try: publication_info_id = legacy_bibliography.publication_info except AttributeError: publication_info_id = '' legacy_id = '%s_%s' % (legacy_bibliography.title.id, publication_info_id) try: publication = self._get_object( sculpture.models.BibliographyPublication, 'bibliography_publication', legacy_id) except KeyError: try: publication_info = migration_utils.regularise_text( legacy_bibliography.publication_info.publication_info) except AttributeError: publication_info = '' publication = sculpture.models.BibliographyPublication( author=author, title=legacy_bibliography.title.title, publication_info=publication_info) self._migrate_timestamps(legacy_bibliography, publication) publication.save() self._id_map['bibliography_publication'][ legacy_id] = publication.id return publication
def migrate_region_type(self): self._id_map['region_type'] = {} for legacy_region_type in legacy.models.RegionType.objects.all(): name = migration_utils.regularise_text(legacy_region_type.name) region_type = sculpture.models.RegionType(name=name) self._migrate_timestamps(legacy_region_type, region_type) region_type.save() self._id_map['region_type'][legacy_region_type.id] = region_type.id
def migrate_period(self): self._id_map['period'] = {} for legacy_period in legacy.models.DateType.objects.all(): name = migration_utils.regularise_text(legacy_period.name) period = sculpture.models.Period(name=name) self._migrate_timestamps(legacy_period, period) period.save() self._id_map['period'][legacy_period.id] = period.id
def migrate_dimension(self): self._id_map['dimension'] = {} # QAZ: Restrict to those which are associated with Features # that are associated with a Site? for legacy_dimension in legacy.models.Dimension.objects.all(): try: feature = self._get_object(sculpture.models.Feature, 'feature', legacy_dimension.feature.id) except KeyError: print('Skipping Dimension associated with a skipped Feature') continue dimension_type = migration_utils.regularise_text( legacy_dimension.type) value = migration_utils.regularise_text(legacy_dimension.value) dimension = sculpture.models.Dimension( feature=feature, dimension_type=dimension_type, value=value) self._migrate_timestamps(legacy_dimension, dimension) dimension.save() self._id_map['dimension'][legacy_dimension.id] = dimension.id
def _get_diocese_object(self, name, period): if not name: diocese = None else: name = self._massage_diocese_name( migration_utils.regularise_text(name), period) diocese = self._map['diocese'].get(name) if diocese is None: diocese = migration_utils.get_or_create_diocese(name) self._map['diocese'][name] = diocese return diocese
def _get_settlement_object(self, name): if not name: settlement = None else: name = migration_utils.regularise_text(name) name = self._massage_settlement_name(name) settlement = self._map['settlement'].get(name) if settlement is None: settlement = migration_utils.get_or_create_settlement(name) self._map['settlement'][name] = settlement return settlement
def _migrate_contributor(self, legacy_contributor): name = migration_utils.regularise_text(legacy_contributor.name) try: first_name, last_name = name.rsplit(None, 1) except ValueError: first_name, last_name = ('', name) try: user = User.objects.get(first_name=first_name, last_name=last_name) user_profile = sculpture.utils.get_profile(user) except User.DoesNotExist, User.MultipleObjectsReturned: user_profile = None
def _tidy_html(self, text): """Returns `text` with entity references removed or decoded and multiple spaces collapsed.""" if not text: return '' text = text.replace(' ', ' ') cleaner = lxml.html.clean.Cleaner(remove_tags=('a', )) text = cleaner.clean_html(text) html = lxml.html.fragment_fromstring(text, create_parent='div') # Remove the added enclosing div. text = lxml.html.tostring(html, encoding='utf-8')[5:-6] return migration_utils.regularise_text(text)
def migrate_settlement(self): self._id_map['settlement'] = {} for legacy_settlement in legacy.models.Settlement.objects.all(): name = migration_utils.regularise_text(legacy_settlement.name) name = self._massage_settlement_name(name) try: settlement = sculpture.models.Settlement.objects.get(name=name) except sculpture.models.Settlement.DoesNotExist: settlement = sculpture.models.Settlement(name=name) self._migrate_timestamps(legacy_settlement, settlement) settlement.save() self._id_map['settlement'][legacy_settlement.id] = settlement.id
def migrate_site_image(self, site, legacy_site_image): data = sculpture.management.image_lookup.get_image_data( legacy_site_image.image.name, False) if data is not None: data.update({ 'caption': migration_utils.regularise_text(legacy_site_image.caption), 'site': site }) site_image = sculpture.models.SiteImage(**data) self._migrate_timestamps(legacy_site_image, site_image) site_image.save()
def migrate_site(self): # QAZ: Finish this: author, photographers, associated_sites... self._id_map['site'] = {} for legacy_site in legacy.models.SculptureSite.objects.all(): country = self._get_object(sculpture.models.Country, 'country', legacy_site.country.id) try: settlement = self._get_object(sculpture.models.Settlement, 'settlement', legacy_site.settlement.id) except AttributeError: settlement = None status = self._get_object(sculpture.models.SiteStatus, 'site_status', legacy_site.status.id) site = sculpture.models.Site( site_id=legacy_site.site_id, status=status, visit_date=self._tidy_html(legacy_site.visit_date), name=migration_utils.regularise_text(legacy_site.name), country=country, grid_reference=migration_utils.normalise_grid_reference( legacy_site.key), settlement=settlement, description=self._tidy_html(legacy_site.description), history=self._tidy_html(legacy_site.history), comments=self._tidy_html(legacy_site.comments)) self._migrate_timestamps(legacy_site, site) site.save() for legacy_author in legacy_site.authors.all(): author = self._get_object(sculpture.models.Contributor, 'contributor', 'a%d' % legacy_author.id) site.authors.add(author) # Add the record author to the list of authors; that # distinction has been removed. author = self._get_object(sculpture.models.Contributor, 'contributor', 'u%d' % legacy_site.author.user.id) site.authors.add(author) for legacy_bibliography in legacy_site.bibliographies.all(): self.migrate_bibliography(site, legacy_bibliography) for legacy_dedication in legacy_site.dedications.all(): self.migrate_site_dedication(site, legacy_dedication) for legacy_diocese in legacy_site.dioceses.all(): self.migrate_site_diocese(site, legacy_diocese) for legacy_region in legacy_site.regions.all(): self.migrate_site_region(site, legacy_region) for legacy_site_image in legacy_site.site_images.all(): self.migrate_site_image(site, legacy_site_image) self._id_map['site'][legacy_site.id] = site.id
def migrate_diocese(self): self._id_map['diocese'] = {} for legacy_diocese in legacy.models.Diocese.objects.all(): period = self._get_object(sculpture.models.Period, 'period', legacy_diocese.date.id) name = migration_utils.regularise_text(legacy_diocese.name) name = self._massage_diocese_name(name, period) try: diocese = sculpture.models.Diocese.objects.get(name=name) except sculpture.models.Diocese.DoesNotExist: diocese = sculpture.models.Diocese(name=name) self._migrate_timestamps(legacy_diocese, diocese) diocese.save() self._id_map['diocese'][legacy_diocese.id] = diocese.id
def _migrate_glossary_term(self, item, parent): term = migration_utils.regularise_text(item.findtext('term')) # Setting the description here is necessary for validation, # but the text is overwritten later in the glossary migration # process. glossary_term = sculpture.models.GlossaryTerm( name=term, broader_term=parent, description='Lorem ipsum', created=self._now, modified=self._now) glossary_term.save() self._id_map['glossary'][item.get('id')] = glossary_term.id return glossary_term
def migrate_region(self): self._id_map['region'] = {} for legacy_region in legacy.models.Region.objects.all(): region_type = self._get_object(sculpture.models.RegionType, 'region_type', legacy_region.type.id) name = migration_utils.regularise_text(legacy_region.name) try: region = sculpture.models.Region.objects.get( name=name, region_type=region_type) except sculpture.models.Region.DoesNotExist: region = sculpture.models.Region(name=name, region_type=region_type) self._migrate_timestamps(legacy_region, region) region.save() self._id_map['region'][legacy_region.id] = region.id
def migrate_detail(self): self._id_map['detail'] = {} # QAZ: Restrict to those which are associated with Features # that are associated with a Site? for legacy_detail in legacy.models.Detail.objects.all(): try: feature = self._get_object(sculpture.models.Feature, 'feature', legacy_detail.feature.id) except KeyError: print('Skipping Detail associated with a skipped Feature') continue title = migration_utils.regularise_text(legacy_detail.title) text = self._tidy_html(legacy_detail.text) detail = sculpture.models.Detail(feature=feature, title=title, text=text) self._migrate_timestamps(legacy_detail, detail) detail.save() self._id_map['detail'][legacy_detail.id] = detail.id
def _migrate_site(self, location_row, site_id): author, country, county = self._get_author_country_county( location_row['author_county']) if country is None: abbreviation = site_id[:2] if abbreviation == 'ed': country = self._get_country_object('England') elif abbreviation == 'id': country = self._get_country_object('Republic of Ireland') elif abbreviation == 'ni': country = self._get_country_object('Northern Ireland') elif abbreviation == 'sd': country = self._get_country_object('Scotland') elif abbreviation == 'ws': country = self._get_country_object('Wales') else: print('Skipping site "%s" due to missing country' % site_id) return settlement = self._get_settlement_object( location_row['type_of_building']) site = sculpture.models.Site( site_id=site_id, status=self._draft_status, visit_date='<p>%s</p>' % location_row['date_site_visit'], name=migration_utils.regularise_text(location_row['location']), country=country, settlement=settlement) site.save() if author: site.authors.add(author) self._migrate_region(site, county) self._migrate_dedication(site, location_row['dedication_modern'], self._period_now) self._migrate_dedication(site, location_row['dedication_medieval'], self._period_medieval) self._migrate_diocese(site, location_row['diocese_modern'], self._period_now) self._migrate_diocese(site, location_row['diocese_medieval'], self._period_medieval) self._migrate_images(site, location_row['id'], author)
def migrate_feature(self): self._id_map['feature'] = {} # QAZ: Restrict to those features associated with a Site? for legacy_feature in legacy.models.Feature.objects.all(): feature_set = self._get_object(sculpture.models.FeatureSet, 'feature_set', legacy_feature.feature_set.id) try: site = self._get_object(sculpture.models.Site, 'site', legacy_feature.site.id) except AttributeError: print('Skipping Feature with no associated Site') continue name = migration_utils.regularise_text(legacy_feature.name) description = self._tidy_html(legacy_feature.description) feature = sculpture.models.Feature(site=site, feature_set=feature_set, name=name, description=description) self._migrate_timestamps(legacy_feature, feature) feature.save() self._id_map['feature'][legacy_feature.id] = feature.id for legacy_feature_image in legacy_feature.feature_images.all(): self.migrate_feature_image(feature, legacy_feature_image)
def _split_dedication(self, text): """Splits the text of a dedication into one or more actual dedications, each consisting of a tuple of name, date, and certainty. This method is to deal with names that are problematic with regards to date, certainty or multiplicity of names. The method _massage_dedication is also used subsequently to sort out typographic problems within a single name. """ text = migration_utils.regularise_text(text) data = [] if not text: pass elif text == 'All Saints and St Margaret': data.append(('All Saints', '', True)) data.append(('St Margaret', '', True)) elif text == 'All Saints (1419, Borthwick)': data.append(('All Saints', '1419, Borthwick', True)) elif text == 'All Saints (1440, Prob. Reg.)': data.append(('All Saints', '1440, Prob. Reg.', True)) elif text == 'All Saints (1508, Prob Reg. 8, f.14)': data.append(('All Saints', '1508, Prob Reg. 8, f.14', True)) elif text == 'All Saints (1550, recorded at Borthwick)': data.append(('All Saints', '1550, recorded at Borthwick', True)) elif text == 'Holy and Undivided Trinity and St Etheldreda': data.append(('Holy and Undivided Trinity', '', True)) data.append(('St Etheldreda', '', True)) elif text == 'Holy Trinity and St Mary': data.append(('Holy Trinity', '', True)) data.append(('St Mary', '', True)) elif text == 'Holy Trinity and St Oswald': data.append(('Holy Trinity', '', True)) data.append(('St Oswald', '', True)) elif text == 'Our Lady 16thc.': data.append(('Our Lady', '16thc.', True)) elif text == 'Our Lady and the Holy Trinity': data.append(('Our Lady', '', True)) data.append(('Holy Trinity', '', True)) elif text == 'Saint Mary and St John the Evangelist': data.append(('St Mary', '', True)) data.append(('St John the Evangelist', '', True)) elif text == "Saints' Church (?)": data.append(("Saints' Church", '', False)) elif re.search(r'^SS ([^ ]+) (and|&|with) ([^ ]+)$', text): match = re.search(r'^SS ([^ ]+) (and|&|with) ([^ ]+)$', text) data.append(('St ' + match.group(1), '', True)) data.append(('St ' + match.group(3), '', True)) elif re.search(r'^St\.? ([^ ]+) (and|&|with) St\.? ([^ ]+)$', text): match = re.search(r'^St\.? ([^ ]+) (and|&|with) St\.? ([^ ]+)$', text) data.append(('St ' + match.group(1), '', True)) data.append(('St ' + match.group(3), '', True)) elif text == 'SS Afran, Ieuan and Sannan': data.append(('St Afran', '', True)) data.append(('St Ieuan', '', True)) data.append(('St Sanna', '', True)) elif text == 'SS Helen and John the Baptist': data.append(('St Helen', '', True)) data.append(('St John the Baptist', '', True)) elif text == 'SS John the Baptist and Alkmund': data.append(('St John the Baptist', '', True)) data.append(('St Alkmund', '', True)) elif text == 'SS. Peter and Paul': data.append(('St Peter', '', True)) data.append(('St Paul', '', True)) elif text == 'St Andrew (1529, Borthwick Institute)': data.append(('St Andrew', '1529, Borthwick Institute', True)) elif text == "St Andrew's (from c.1220)": data.append(('St Andrew', 'from c.1220', True)) elif text == 'St Andrew c. 1220': data.append(('St Andrew', 'c.1220', True)) elif text == 'St Andrew c.1588': data.append(('St Andrew', 'c.1588', True)) elif text == 'St Anne from 1538': data.append(('St Anne', 'from 1538', True)) elif text == 'St Bartholomew until 1991': data.append(('St Bartholomew', 'until 1991', True)) elif text == 'St Benedict and Holy Cross': data.append(('St Benedict', '', True)) data.append(('Holy Cross', '', True)) elif text == 'St Bilo (or St Milburg, abbess of Wenlock, 7thc.)': data.append(('St Bilo (or St Milburg, abbess of Wenlock, 7thc.)', '', True)) elif text == 'St Blathmac (?)': data.append(('St Blathmac', '', False)) elif text == 'St Brecan (?)': data.append(('St Brecan', '', False)) elif text == 'St Brigid (?)': data.append(('St Brigid', '', False)) elif text == 'St George and All Saints': data.append(('St George', '', True)) data.append(('All Saints', '', True)) elif text == 'St Giles c.1120': data.append(('St Giles', 'c.1120', True)) elif text == 'St Helen (1390, Prob. Reg.)': data.append(('St Helen', '1390, Prob. Reg.', True)) elif text == 'St James 13thc.': data.append(('St James', '13thc.', True)) elif text == 'St James c.1200': data.append(('St James', 'c.1200', True)) elif text == 'St John c.1200, 1517': data.append(('St John', 'c.1200, 1517', True)) elif text == 'St John in reign of Wm. Rufus; St John the Baptist in 1551': data.append(('St John', 'in reign of Wm. Rufus', True)) data.append(('St John the Baptist', '1551', True)) elif text == 'St John(reign of Wm. Rufus), St John the Baptist': data.append(('St John', 'reign of Wm. Rufus', True)) data.append(('St John the Baptist', '', True)) elif text == 'St John the Baptist (in 1551, Borthwick Institute)': data.append( ('St John the Baptist', 'in 1551, Borthwick Institute', True)) elif text == 'St John the Baptist?': data.append(('St John the Baptist', '', False)) elif text == 'St John the Baptist and St Alkmund': data.append(('St John the Baptist', '', True)) data.append(('St Alkmund', '', True)) elif text == 'St John the Evangelist (c.1150, SS James and John (c.1150), St John (1523)': data.append(('St John the Evangelist', 'c.1150', True)) data.append(('St James', 'c.1150', True)) data.append(('St John', 'c.1150', True)) data.append(('St John', '1523', True)) elif text == 'St Julian late 12thc.': data.append(('St Julian', 'late 12thc.', True)) elif text == 'St Laurence (c.1190)': data.append(('St Laurence', 'c.1190', True)) elif text == 'St Lawrence,': data.append(('St Lawrence', '', True)) elif text == 'St Leonard 1341 (unconfirmed)': data.append(('St Leonard', '1341 (unconfirmed)', True)) elif text == 'St Margaret (from)': data.append(('St Margaret', '', True)) elif text == 'St Margaret 13thc.': data.append(('St Margaret', '13thc.', True)) elif text == 'St Margaret1514': data.append(('St Margaret', '1514', True)) elif text == 'St Margaret? 1520': data.append(('St Margaret', '1520', False)) elif text == 'St Mary (1431, Prob. Reg.)': data.append(('St Mary', '1431, Prob. Reg.', True)) elif text == 'St Mary, originally St Cwrda': data.append(('St Mary', '', True)) data.append(('St Cwrda', 'originally', True)) elif text == 'St Mary, latterly, St Margaret': data.append(('St Mary', '', True)) data.append(('St Margaret', '', True)) elif text == 'St Mary, St Benedict and Holy Cross': data.append(('St Mary', '', True)) data.append(('St Benedict', '', True)) data.append(('Holy Cross', '', True)) elif text == 'St Mary?': data.append(('St Mary', '', False)) elif text == 'St Mary1224': data.append(('St Mary', '1224', True)) elif text == 'St Mary 13thc.': data.append(('St Mary', '13thc.', True)) elif text == 'St Mary and All Saints': data.append(('St Mary', '', True)) data.append(('All Saints', '', True)) elif text == 'St Mary and All Saints 1763': data.append(('St Mary', '1763', True)) data.append(('All Saints', '1763', True)) elif text == 'St Mary and St Cuthbert 1187': data.append(('St Mary', '1187', True)) data.append(('St Cuthbert', '1187', True)) elif text == 'St Mary and St Thomas of Canterbury': data.append(('St Mary', '', True)) data.append(('St Thomas of Canterbury', '', True)) elif text == 'St Mary and the Holy Cross': data.append(('St Mary', '', True)) data.append(('Holy Cross', '', True)) elif text == 'St Mary before 1216': data.append(('St Mary', 'before 1216', True)) elif text == 'St Mary Immaculate and St Joseph': data.append(('St Mary Immaculate', '', True)) data.append(('St Joseph', '', True)) elif text == 'St Mary Magdalene and St Denys 1764': data.append(('St Mary Magdalene', '1764', True)) data.append(('St Denys', '1764', True)) elif text == 'St Mary the Less (Little St Mary)': data.append(('St Mary the Less', '', True)) data.append(('Little St Mary', '', True)) elif text == 'St Mary the Virgin and All Saints': data.append(('St Mary the Virgin', '', True)) data.append(('All Saints', '', True)) elif text == 'St Mary the Virgin mid 12thc.': data.append(('St Mary the Virgin', 'mid 12thc.', True)) elif text == 'St Mary with St John': data.append(('St Mary', '', True)) data.append(('St John', '', True)) elif text == 'St Mary (or St Andrew) 1086': data.append(('St Mary', '1086', True)) data.append(('St Andrew', '1086', True)) elif text == 'St Matthew1230': data.append(('St Matthew', '1230', True)) elif text == 'St Michael and All Angels 18thc.': data.append(('St Michael and All Angels', '18thc.', True)) elif text in ('St. Mchael and All Angels', 'St Michael & All Angels', 'St Michael and All Angels'): data.append(('St Michael', '', True)) data.append(('All Angels', '', True)) elif text == 'St Michael and All Saints': data.append(('St Michael', '', True)) data.append(('All Saints', '', True)) elif text == 'St Mogua (?)': data.append(('St Mogua', '', False)) elif text == 'St Monacella/St Melangell': data.append(('St Monacella', '', True)) data.append(('St Melangell', '', True)) elif text == 'St Nicholas; St Thomas a Becket': data.append(('St Nicholas', '', True)) data.append(('St Thomas a Becket', '', True)) elif text == 'St Nicholas (before1099)': data.append(('St Nicholas', 'before 1099', True)) elif text in ('St Nicholas and St Peter ad Vincula', 'St Nicholas (and St Peter ad Vincula)'): data.append(('St Nicholas', '', True)) data.append(('St Peter ad Vincula', '', True)) elif text == 'St Nicholas late 11thc.': data.append(('St Nicholas', 'late 11thc.', True)) elif text == 'St Ninian, St Peter and St Paul': data.append(('St Ninian', '', True)) data.append(('St Peter', '', True)) data.append(('St Paul', '', True)) elif text == 'St Patrick (?)': data.append(('St Patrick', '', False)) elif text == 'St Peter?': data.append(('St Peter', '', False)) elif text == 'St Peter 16thc.': data.append(('St Peter', '16thc.', True)) elif text == 'St Peter and All Saints': data.append(('St Peter', '', True)) data.append(('All Saints', '', True)) elif text == 'St Peter and Paul': data.append(('St Peter', '', True)) data.append(('St Paul', '', True)) elif text == 'St Peter and St John the Baptist': data.append(('St Peter', '', True)) data.append(('St John the Baptist', '', True)) elif text == 'St Peter and St Paul 1556 and 1710': data.append(('St Peter', '1556 and 1710', True)) data.append(('St Paul', '1556 and 1710', True)) elif re.search(r'^St Peter and St Paul \(?(\d+)\)?$', text): match = re.search(r'^St Peter and St Paul \(?(\d+)\)?$', text) data.append(('St Peter', match.group(1), True)) data.append(('St Paul', match.group(1), True)) elif text == 'St Peter c.1200; 1408': data.append(('St Peter', 'c.1200; 1408', True)) elif text == 'St Peter c.1300': data.append(('St Peter', 'c.1300', True)) elif text == 'St Peter (1474), SS Peter & Paul (1510)': data.append(('St Peter', '1474', True)) data.append(('St Peter', '1510', True)) data.append(('St Paul', '1510', True)) elif text == 'St Peter (1346, Prob. Reg.)': data.append(('St Peter', '1346, Prob. Reg.', True)) elif text == 'St Saviour (1510-13), Holy Trinity (1513-17)': data.append(('St Saviour', '1510-13', True)) data.append(('Holy Trinity', '1513-17', True)) elif text == 'St Trunio and SS Peter and Paul': data.append(('St Trunio', '', True)) data.append(('St Peter', '', True)) data.append(('St Paul', '', True)) elif text == '[? St Mary, originally St Cwrda]': data.append(('St Mary', '', False)) data.append(('St Cwrda', 'originally', False)) elif text == "The 'small church'": data.append(('The small church', '', True)) elif text == 'unconfirmed': data.append(('not confirmed', '', True)) else: for part in text.split(', '): pattern = r'^([^\d]*) \(?(\d+)\)?$' match = re.search(pattern, part) if match: name = match.group(1) date = match.group(2) else: name = part date = '' data.append((name, date, True)) return data