def parse_creatures(pages: List[str]) -> Optional[List[object]]: # parse the families of creatures from http://2e.aonprd.com/Monsters.aspx?Letter=All try: with fetch.urlopen( 'http://2e.aonprd.com/Monsters.aspx?Letter=All') as inf: fam_page = inf.read() if not fam_page: raise ValueError('unable to fetch families table from AoN') except Exception: print('error fetching family table') sys.exit(1) fam_table: List[Tag] = BeautifulSoup(fam_page, 'html.parser').find_all('tr') fams = {} for tr in fam_table[1:]: name = tr.findChildren('td')[0].a.u.string fam = str(tr.findChildren('td')[1].string).strip() if fams.get(fam): fams.get(fam).append(name) else: fams[fam] = [name] creatures: List[object] = [] for ind, page in enumerate(pages): if page == '': continue # placeholder to properly ennumerate "bad" array items print('parsing id={}\tof\t{}'.format(ind + 1, len(pages))) creature: Creature = Creature() main_tag = BeautifulSoup(page, 'html.parser').find( 'span', id='ctl00_MainContent_DetailedOutput') # id/name/level creature.id = ind + 1 creature.name = str(main_tag.h1.string) creature.level = main_tag.find( 'span', text=re.compile('Creature -?[0-9]+')).text.split()[1] # source source_tag = main_tag.find('b', text=re.compile('^Source$')).find_next( 'a', class_='external-link').find_next('i').text src = [s.strip() for s in str(source_tag).split('pg.')] creature.source.book = src[0] creature.source.page = int(src[1]) # HP TODO REWORK THE WHOLE SECTION hp_tag = main_tag hp_re = re.compile( r'\s*HP\s*(?P<hp>[0-9]+);?\s*(?P<hp_notes>[\w\d\s\-()\'’+.,]*);?\s*' ) while hp_tag.next: if hp_tag.name and hp_tag.name == 'b' and hp_tag.text == 'HP': break hp_tag = hp_tag.next hp_str = '' while hp_tag.next: if hp_tag.name == 'br' or hp_tag.name == 'hr': # or (hp_tag.name == 'b' and hp_tag.string in ['Immunities', 'Resistances', 'Weaknesses']): break if type(hp_tag) == NavigableString: hp_str = ''.join((hp_str, hp_tag.string)) hp_tag = hp_tag.next hp_match = re.match(hp_re, hp_str) creature.hitPoints = int(hp_match.groupdict().get('hp')) creature.hitPointsNotes = hp_match.groupdict().get('hp_notes') regen_re = re.compile( r'\s*[rR]egeneration (?P<regen>[0-9]+)\s*,?\s*\(?deactivated by\s*(?P<deactivated>[\w ]+)\)?\s*' ) hardness_re = re.compile(r'\s*[hH]ardness (?P<hard>[0-9]+)') if creature.hitPointsNotes: creature.hitPointsNotes = ''.join( re.split(re.compile(r'\s*HP [0-9]+[,;]+\s*'), creature.hitPointsNotes)[1:]).strip(' ;,') regen_match: Match = re.match(regen_re, creature.hitPointsNotes) if regen_match: creature.regeneration = int(regen_match.group('regen')) creature.deactivatedBy = regen_match.group('deactivated') hardness_match: Match = re.match(hardness_re, creature.hitPointsNotes) if hardness_match: creature.hardness = int(hardness_match.group('hard')) creature.hitPointsNotes = re.sub(hardness_re, '', creature.hitPointsNotes) # Immunities; Weaknesses; Resistances imm_pattern = re.compile( r'\s*(Immunities\s*(?P<imm>[\w\d\s\-(),\']*);?)?\s*(Weaknesses\s*(?P<weak>[\w\d\s\-(),\']*);?)?\s*(Resistances\s*(?P<res>[\w\d\s\-(),\']*);?)?' ) imm_str = '' while hp_tag.next: if hp_tag.name == 'br' or hp_tag.name == 'hr': break if type(hp_tag) == NavigableString: imm_str = ''.join([imm_str, hp_tag.string]) hp_tag = hp_tag.next imm_match = re.match(imm_pattern, imm_str) creature.immunities = [ x.strip() for x in imm_match.group('imm').split(',') ] if imm_match.group('imm') else [] creature.weaknesses = [ x.strip() for x in imm_match.group('weak').split(',') ] if imm_match.group('weak') else [] creature.resistances = [ x.strip() for x in imm_match.group('res').split(',') ] if imm_match.group('res') else [] # Traits trait_tag = main_tag while trait_tag.next: # scan through until traits section if trait_tag.name and (trait_tag.get('class') == ['traituncommon' ]): creature.rarity = 'uncommon' if trait_tag.name and (trait_tag.get('class') == ['traitrare']): creature.rarity = 'rare' if trait_tag.a.string == 'Rare' else 'unique' if trait_tag.name and trait_tag.get('class') == ['traitalignment']: break trait_tag = trait_tag.next while trait_tag.next: if trait_tag.name == 'br' or trait_tag.name == 'hr': break if trait_tag.name and trait_tag.get('class') == ['traitalignment']: creature.alignment = trait_tag.text if trait_tag.name and trait_tag.get('class') == ['traitsize']: creature.size = trait_tag.text if trait_tag.name and trait_tag.get('class') == ['trait']: t: Trait = Trait(name=trait_tag.text, description=trait_tag.get('title')) creature.traits.append(t) trait_tag = trait_tag.next # Perception and senses sense_tag = trait_tag sense_str = '' sense_re = re.compile( r'\s*Perception\s*(?P<per>[+-]?[0-9]+);?\s*(?P<per_notes>[\w\d\s\-()\'+.,]*)?\s*' ) while sense_tag.next: if sense_tag.name == 'b' and sense_tag.string == 'Perception': break sense_tag = sense_tag.next while sense_tag.next: if sense_tag.name == 'br' or sense_tag.name == 'hr': break elif type(sense_tag) == NavigableString: sense_str = ''.join((sense_str, sense_tag.string)) sense_tag = sense_tag.next sense_match = re.match(sense_re, sense_str) creature.perception = int(sense_match.group('per')) creature.senses = [ x.strip() for x in sense_match.group('per_notes').split(',') ] # languages language_tag = sense_tag.find_next('b', text='Languages') language_re = re.compile( r'\s*Languages\s*(?P<langs>[\w\d\s\-()\'+.,]*);?\s*(?P<comms>[\w\d\s\-()\'+.,]*)?\s*' ) if language_tag: language_str = '' while language_tag.next: if language_tag.name == 'br' or language_tag.name == 'hr': break elif type(language_tag) == NavigableString: language_str = ''.join((language_str, language_tag.string)) language_tag = language_tag.next language_match = re.match(language_re, language_str) creature.languages = [ x.strip() for x in language_match.group('langs').split(',') ] creature.otherCommunication = [ x.strip() for x in language_match.group('comms').split(',') ] # skills skill_tag = sense_tag.find_next('b', text='Skills') skills_re = re.compile(r'\s*Skills\s*(?P<skills>[\w\d\s\-()\'+.,]*)') skill_re = re.compile( r'(?P<name>[\w ]*)\s*(?P<mod>[+-]+[0-9]+)\s*(?P<notes>\([\w\d\s\-()\'+.,]*\))?\s*' ) if skill_tag: skill_str = '' while skill_tag.next: if skill_tag.name == 'br' or skill_tag.name == 'hr': break elif type(skill_tag) == NavigableString: skill_str = ''.join((skill_str, skill_tag.string)) skill_tag = skill_tag.next skill_match = re.match(skills_re, skill_str) for s in re.finditer(skill_re, skill_match.group('skills')): skill = Header(s.group('name'), s.group('notes'), int(s.group('mod'))) creature.skills.append(skill) # ability mods abm_tag = main_tag abm_str = '' abm_re = re.compile( r'\s*Str\s*(?P<str>[+-][0-9]+),\s*Dex\s*(?P<dex>[+-][0-9]+),\s*Con\s*(?P<con>[+-][0-9]+),\s*Int\s*(?P<int>[+-][0-9]+),\s*Wis\s*(?P<wis>[+-][0-9]+),\s*Cha\s*(?P<cha>[+-][0-9]+)\s*' ) while abm_tag.next: if abm_tag.name == 'b' and abm_tag.string == 'Str': break abm_tag = abm_tag.next while abm_tag.next: if abm_tag.name == 'br' or abm_tag.name == 'hr': break elif type(abm_tag) == NavigableString: abm_str = ''.join((abm_str, abm_tag.string)) abm_tag = abm_tag.next abmods = re.match(abm_re, abm_str) creature.abilityMods = [int(x) for x in abmods.groups()] # items # (for some reason these are listed in the template as ABOVE interaction abilities, but are often NOT) item_tag = abm_tag item_str = '' item_re = re.compile(r'\s*(?P<item>[\w\d\s\-()\'+.,]+),?\s*') while item_tag.next: if item_tag.name == 'b' and item_tag.string == 'Items': break item_tag = item_tag.next while item_tag.next: if item_tag.name == 'br' or item_tag.name == 'hr': break elif type(item_tag) == NavigableString: item_str = ''.join((item_str, item_tag.string)) item_tag = item_tag.next if item_str: item_str = item_str.replace('Items', '', 1) item_matches = re.findall(item_re, item_str) creature.items = [x.strip() for x in item_matches if x.strip()] # interaction abilities creature.interactionAbilities, _ = get_abilities(abm_tag) # AC ac_tag = main_tag while ac_tag.next and not creature.ac: if ac_tag.name and ac_tag.name == 'b' and ac_tag.text == 'AC' and type( ac_tag.next) == NavigableString: creature.ac = int( re.match(re.compile(r'\s*(?P<ac>[0-9]+)[;,]?\s*'), ac_tag.next_sibling.string).group('ac')) ac_tag = ac_tag.next # AC notes, iterate until saves while ac_tag.next: if ac_tag.name == 'b' and ac_tag.text == 'Fort': break elif not ac_tag.name: creature.acNotes = ''.join([creature.acNotes, ac_tag.string]) ac_tag = ac_tag.next if creature.acNotes: creature.acNotes = re.sub(re.compile(r'\s*AC\s*[0-9]+\s*[;,]*'), '', creature.acNotes).strip() # iterate through the whole row until hr/br, then parse the resulting string for saves and notes save_pattern = re.compile( r'\s*Fort\s*(?P<fort>[+\-][0-9]+)\s*(?P<fort_notes>[\w\d\s\-()\'+.,]*),\s*' r'Ref\s*(?P<ref>[+\-][0-9]+)\s*(?P<ref_notes>[\w\d\s\-()\'+.,]*),\s*' r'Will\s*(?P<will>[+\-][0-9]+)\s*(?P<will_notes>[\w\d\s\-()\'+.,]*);?' r'(?P<save_notes>[\w\d\s\-()\'+.,]*)?\s*') saves_str = '' while ac_tag.next: if ac_tag.name == 'hr' or ac_tag.name == 'br': break if type(ac_tag) == NavigableString: saves_str = ''.join([saves_str, ac_tag.string]) ac_tag = ac_tag.next saves_match = re.match(save_pattern, saves_str) creature.fortitude = int(saves_match.group('fort')) creature.fortitudeNotes = saves_match.group('fort_notes') creature.reflex = int(saves_match.group('ref')) creature.reflexNotes = saves_match.group('ref_notes') creature.will = int(saves_match.group('will')) creature.willNotes = saves_match.group('will_notes') creature.saveNotes = saves_match.group('save_notes') # automatic abilities creature.automaticAbilities, _ = get_abilities(hp_tag) # speed speed_tag = hp_tag.next speed_str = '' while speed_tag.next: if speed_tag.name == 'b' and speed_tag.string == 'Speed': break speed_tag = speed_tag.next while speed_tag.next and speed_tag.name != 'hr' and speed_tag.name != 'br': if type(speed_tag) == NavigableString and speed_tag.string.strip( ) != 'Speed': speed_str = ''.join((speed_str, speed_tag.string)) speed_tag = speed_tag.next creature.speed = speed_str # offensive/proactive abilities action_tag: Tag = speed_tag.next creature.strikes, action_tag = get_strikes(action_tag) creature, action_tag = get_spells(creature, action_tag) creature.activeAbilities, action_tag = get_abilities(action_tag) creature.sidebars, action_tag = get_sidebars(action_tag) # set family creature.family = [k for (k, v) in fams.items() if creature.name in v] creature.family = creature.family[0] if creature.family else '—' # NAVIGABLE STRINGS CAUSE RECURSION MAX DEPTH EXCEPTIONS. Convert to str before setting fields creatures.append(dataclasses.asdict(creature)) return creatures
def _import_trait(self, suite, dirname): traitdb = Trait(self.conn, suite) traitdb.insert_trait(dirname, suite)
def _insert_trait_(self, trait, suite): traitdb = Trait(self.conn, suite) #path = join(self.main_path, suite, trait + '.tar') path = join(self.main_path, suite, trait) traitdb.insert_trait(path, suite)
def traits(self): return [Trait("name", self.name)]
def defaultTraits(): return [Trait("name")]
def defaultTraits(): return [Trait("id"), Trait("shelter_id")]