def add_notes(): from sc.csv_loader import table_reader notes = [e for e in table_reader('correspondence') if e.footnote] for entry in notes: uid = entry.sutta_uid other_uid = entry.other_sutta_uid note = entry.footnote partial = entry.partial count = 0 for obj in parallels_index[uid]: group = obj['group'] if other_uid in group: if 'notes' not in obj: obj['notes'] = [] obj['notes'].append({ 'uids': [uid, other_uid], 'note': note }) count += 1 if count == 0: globals().update(locals()) raise ValueError('Group could not be applied anywhere!') if count > 1: #globals().update(locals()) print('Group applied in multiple places!')
def build_parallels_data(self): fulls = defaultdict(set) partials = defaultdict(set) indirects = defaultdict(set) # initially we operate purely on ids using id, footnote tuples #Populate partial and full parallels for row in table_reader('correspondence'): if row.partial: partials[row.sutta_uid].add( (row.other_sutta_uid, row.footnote) ) partials[row.other_sutta_uid].add( (row.sutta_uid, row.footnote) ) else: fulls[row.sutta_uid].add( (row.other_sutta_uid, row.footnote) ) fulls[row.other_sutta_uid].add( (row.sutta_uid, row.footnote) ) # Populate indirect full parallels for id, parallels in fulls.items(): for pid, footnote in parallels: if pid in fulls: indirects[id].update(fulls[pid]) for id, parallels in indirects.items(): # Remove self and fulls indirects[id] -= set(a for a in indirects[id] if a[0] == id) return { 'fulls': fulls.items(), 'indirects': indirects.items(), 'partials': partials.items(), }
def add_notes(): from sc.csv_loader import table_reader notes = [e for e in table_reader('correspondence') if e.footnote] for entry in notes: uid = entry.sutta_uid other_uid = entry.other_sutta_uid note = entry.footnote partial = entry.partial count = 0 for obj in parallels_index[uid]: group = obj['group'] if other_uid in group: if 'notes' not in obj: obj['notes'] = [] obj['notes'].append({'uids': [uid, other_uid], 'note': note}) count += 1 if count == 0: globals().update(locals()) raise ValueError('Group could not be applied anywhere!') if count > 1: #globals().update(locals()) print('Group applied in multiple places!')
def load(self): from sc.csv_loader import table_reader reader = table_reader('pali_concord') mapping = {(msbook, int(msnum), edition): (book, page) for msbook, msnum, edition, book, page in reader} self.mapping = mapping
def build_sc_uid_expansion(env): from sc.csv_loader import table_reader import json, os.path filename = 'js/sc_uid_expansion_data.js' fullname = os.path.join(env.directory, filename) with open(fullname, 'w', encoding='UTF8') as outfile: data = {uid: [acro, name] for uid, acro, name in table_reader('uid_expansion')} outfile.write('sc.util.expand_uid_data = {}'.format( json.dumps(data, ensure_ascii=False))) return filename
def build_sc_uid_expansion(env): from sc.csv_loader import table_reader import json, os.path filename = 'js/sc_uid_expansion_data.js' fullname = os.path.join(env.directory, filename) with open(fullname, 'w', encoding='UTF8') as outfile: data = { uid: [acro, name] for uid, acro, name in table_reader('uid_expansion') } outfile.write('sc.util.expand_uid_data = {}'.format( json.dumps(data, ensure_ascii=False))) return filename
def build_grouped_suttas(self): vinaya_rules = {} for i, row in enumerate(table_reader('vinaya_rules')): uid = row.uid rule = GroupedSutta( uid=uid, volpage=row.volpage_info, imm=self, ) subdivision = rule.subdivision subdivision.suttas.append(rule) subdivision.vaggas[0].suttas.append(rule) self.suttas[uid] = rule
import regex from sc import csv_loader # Load uid to acro map _uid_to_acro_map = {} _uid_to_name_map = {} for row in csv_loader.table_reader('uid_expansion'): _uid_to_acro_map[row.uid] = row.acro _uid_to_name_map[row.uid] = row.name def _expand_uid(uid, mapping): components = regex.findall(r'\p{alpha}+|\d+(?:\.\d+)?(?:-\d+)?', uid) out = ' '.join(mapping.get(c) or c.upper() for c in components) out = regex.sub(r'(?<=\d+)-(?=\d+)', r'–', out) return out def uid_to_acro(uid): return _expand_uid(uid, _uid_to_acro_map) def uid_to_name(uid): return _expand_uid(uid, _uid_to_name_map) languages = csv_loader.load_table('language')
def build(self): """ Build the sutta central In Memory Model This starts from the highest level (i.e. collection) and works to the lowest level (i.e. parallels - the relationship between suttas) Since it is fully navigitable both up and down this means some elements can't be populated initially. This means that suttas insert themselves into the subdivision where they belong. Some tables are indexed as dicts, with the key being the uid. These include: collection, division, subdivision, sutta, language When classes are contained within a class, for example, suttas in a subdivision, this is always represented by a list. That list will be sorted appropriately and can be directly outputted, generally without any need for filtering or sorting. When an attribute is a list or dict, the name always end in an 's' for example: imm.suttas['sn1.1'].subdivision.division.subdivisions[0].suttas[0] Some things, such as parallels, are not indexed at all, and are only accessable as attributes of the relevant suttas. The imm also examines the file system. The fully qualified path to a text can be acquired using: imm.text_paths[lang][uid] """ # Build Pitakas self.pitakas = OrderedDict() for row in table_reader('pitaka'): self.pitakas[row.uid] = Pitaka(uid=row.uid, name=row.name, always_full=row.always_full) # Build Sects self.sects = OrderedDict() for row in table_reader('sect'): self.sects[row.uid] = Sect(uid=row.uid, name=row.name) # Build Languages (indexed by id) self.languages = OrderedDict() for row in table_reader('language'): self.languages[row.uid] = Language( uid=row.uid, name=row.name, iso_code=row.iso_code, isroot=row.isroot, priority=int(row.priority), search_priority=float(row.search_priority), collections=[], ) # Note that one isocode can map to multiple languages # for example zh modern/ancient self.isocode_to_language = {} for language in self.languages.values(): if language.iso_code not in self.isocode_to_language: self.isocode_to_language[language.iso_code] = [] self.isocode_to_language[language.iso_code].append(language) # From external_text table text_refs = defaultdict(list) for row in table_reader('external_text'): text_refs[row.sutta_uid].append( TextRef(lang=self.languages[row.language], name=None, abstract=row.abstract, url=row.url, priority=row.priority) ) self._external_text_refs = text_refs.copy() collections = [] for i, row in enumerate(table_reader('collection')): if row.sect_uid: sect = self.sects[row.sect_uid] else: sect = None collection = Collection( uid=row.uid, name=row.name, abbrev_name=row.abbrev_name, lang=self.languages[row.language], sect=sect, pitaka=self.pitakas[row.pitaka_uid], menu_seq=i, divisions=[] # Populate later ) collections.append(collection) # Sort collections by menu_seq collections.sort(key=Collection.sort_key) self.collections = OrderedDict() for collection in collections: self.collections[collection.uid] = collection self.languages[collection.lang.uid].collections.append(collection) # Build divisions (indexed by uid) self.divisions = OrderedDict() for i, row in enumerate(table_reader('division')): collection = self.collections[row.collection_uid] text_ref = self.get_text_ref(uid=row.uid, lang_uid=collection.lang.uid); division = Division( uid=row.uid, name=row.name, alt_name=row.alt_name, text_ref=text_ref, acronym=row.acronym or uid_to_acro(row.uid), subdiv_ind=row.subdiv_ind, menu_seq=i, menu_gwn_ind=bool(row.menu_gwn_ind), collection=collection, subdivisions=[], # Populate later ) self.divisions[row.uid] = division # Populate collections collection.divisions.append(division) # Sort divisions within collections by menu_seq for collection in self.collections.values(): collection.divisions.sort(key=Division.sort_key) # Build subdivisions (indexed by uid) self.subdivisions = OrderedDict() self.nosubs = set() for i, row in enumerate(table_reader('subdivision')): subdivision = Subdivision( uid=row.uid, acronym=row.acronym, division=self.divisions[row.division_uid], name=row.name, vagga_numbering_ind=row.vagga_numbering_ind, order=i, vaggas=[], # Populate later suttas=[] # Populate later ) self.subdivisions[row.uid] = subdivision if row.uid.endswith('-nosub'): self.nosubs.add(row.uid[:-6]) # populate divisions.subdivisions self.divisions[row.division_uid].subdivisions.append(subdivision) for division in self.divisions.values(): if not division.subdivisions: subdivision = Subdivision( uid=division.uid, acronym=None, division=division, name=None, vagga_numbering_ind=False, order=9000, vaggas=[], suttas=[]) division.subdivisions.append(subdivision) self.subdivisions[division.uid] = subdivision # Build vaggas self.vaggas = OrderedDict() for row in table_reader('vagga'): vagga = Vagga( subdivision=self.subdivisions[row.subdivision_uid], number=row.number, name=row.name, suttas=[], # Populate later ) self.vaggas[(row.subdivision_uid, row.number)] = vagga # Populate subdivision.vaggas vagga.subdivision.vaggas.append(vagga) for subdivision in self.subdivisions.values(): if not subdivision.vaggas: subdivision.vaggas.append(Vagga( subdivision=subdivision, number=0, name=None, suttas=[])) # Load biblio entries (Not into an instance variable) biblios = {} for row in table_reader('biblio'): biblios[row.uid] = BiblioEntry( uid=row.uid, name=row.name, text=row.text) # Build suttas (indexed by uid) suttas = [] for row in table_reader('sutta'): uid = row.uid volpage = row.volpage.split('//') acro = row.acronym.split('//') if not acro[0]: acro[0] = uid_to_acro(uid) lang = self.languages[row.language] subdivision = self.subdivisions[row.subdivision_uid] if row.vagga_number: vagga_number = int(row.vagga_number) vagga = subdivision.vaggas[vagga_number - 1] else: vagga_number = 0 vagga = subdivision.vaggas[0] m = regex.search(r'(?r)\d+', row.uid) if m: number = int(m[0]) else: number = 9999 biblio_entry = None if row.biblio_uid: biblio_entry = biblios.get(row.biblio_uid) sutta = Sutta( uid=row.uid, acronym=acro[0], alt_acronym=acro[1] if len(acro) > 1 else None, name=row.name, vagga_number=vagga_number, lang=lang, subdivision=subdivision, vagga=vagga, number=number, number_in_vagga=row.number_in_vagga, volpage=volpage[0], alt_volpage_info=volpage[1] if len(volpage) > 1 else None, biblio_entry=biblio_entry, parallels=[], imm=self, ) suttas.append( (uid, sutta) ) suttas = sorted(suttas, key=numsortkey) self.suttas = OrderedDict(suttas) # Populate subdivisions.suttas for sutta in self.suttas.values(): sutta.subdivision.suttas.append(sutta) sutta.vagga.suttas.append(sutta)
def build_parallel_sutta_group(self, table_name): """ Generate a cleaned up form of the table data A parallel group is a different way of defining parallels, in essence it is a group of suttas (in the broader sense) from different traditions, all of which are the same 'thing', this is for example particulary relevant in the Patimokkha which is extremely similar across the traditions. All suttas within a sutta group share the same name (title) this is done mainly because many manuscripts lack titles (these being added by redactors). Also their uids are consistently derived from their division/subdivision uid. Some of this code is pretty messy but that can't really be helped because it's really the underlying logic that is pretty messy. """ def normalize_uid(uid): return uid.replace('#', '-').replace('*', '') org_by_rule = list(table_reader(table_name)) by_column = [] for i, column in enumerate(zip(*org_by_rule)): #rotate if i == 0: by_column.append(column) else: division_uid = column[0] try: division = self.divisions[division_uid] except KeyError: raise Exception('Bad column data `{}`'.format(column)) division_negated_parallel = NegatedParallel( division=division) division_maybe_parallel = MaybeParallel( division=division) new_column = [] by_column.append(new_column) for j, uid in enumerate(column): if j <= 1: new_column.append(uid) else: if not uid or uid == '-': new_column.append(division_negated_parallel) elif uid == '?': new_column.append(division_maybe_parallel) else: try: sutta = self.suttas[uid.rstrip('*')] except KeyError: sutta = self.suttas[normalize_uid(uid)] new_column.append(sutta) by_row = list(zip(*by_column)) #self.by_column = by_column #self.by_row = by_row for row in by_row[2:]: group = ParallelSuttaGroup(row[0], row[1:]) for rule in row[1:]: if isinstance(rule, GroupedSutta): if hasattr(rule, 'parallel_group'): if not isinstance(rule.parallel_group, MultiParallelSuttaGroup): rule.parallel_group = MultiParallelSuttaGroup(rule.parallel_group) rule.parallel_group.add_group(group) else: rule.parallel_group = group