Exemplo n.º 1
0
def parse_creatures(pages: List[str]) -> Optional[List[object]]:
    # parse the families of creatures from http://2e.aonprd.com/Monsters.aspx?Letter=All
    try:
        with fetch.urlopen(
                'http://2e.aonprd.com/Monsters.aspx?Letter=All') as inf:
            fam_page = inf.read()
            if not fam_page:
                raise ValueError('unable to fetch families table from AoN')
    except Exception:
        print('error fetching family table')
        sys.exit(1)

    fam_table: List[Tag] = BeautifulSoup(fam_page,
                                         'html.parser').find_all('tr')
    fams = {}
    for tr in fam_table[1:]:
        name = tr.findChildren('td')[0].a.u.string
        fam = str(tr.findChildren('td')[1].string).strip()
        if fams.get(fam):
            fams.get(fam).append(name)
        else:
            fams[fam] = [name]

    creatures: List[object] = []
    for ind, page in enumerate(pages):
        if page == '':
            continue  # placeholder to properly ennumerate "bad" array items
        print('parsing id={}\tof\t{}'.format(ind + 1, len(pages)))
        creature: Creature = Creature()
        main_tag = BeautifulSoup(page, 'html.parser').find(
            'span', id='ctl00_MainContent_DetailedOutput')

        # id/name/level
        creature.id = ind + 1
        creature.name = str(main_tag.h1.string)
        creature.level = main_tag.find(
            'span', text=re.compile('Creature -?[0-9]+')).text.split()[1]

        # source
        source_tag = main_tag.find('b', text=re.compile('^Source$')).find_next(
            'a', class_='external-link').find_next('i').text
        src = [s.strip() for s in str(source_tag).split('pg.')]
        creature.source.book = src[0]
        creature.source.page = int(src[1])

        # HP TODO REWORK THE WHOLE SECTION
        hp_tag = main_tag
        hp_re = re.compile(
            r'\s*HP\s*(?P<hp>[0-9]+);?\s*(?P<hp_notes>[\w\d\s\-()\'’+.,]*);?\s*'
        )
        while hp_tag.next:
            if hp_tag.name and hp_tag.name == 'b' and hp_tag.text == 'HP':
                break
            hp_tag = hp_tag.next

        hp_str = ''
        while hp_tag.next:
            if hp_tag.name == 'br' or hp_tag.name == 'hr':
                # or (hp_tag.name == 'b' and hp_tag.string in ['Immunities', 'Resistances', 'Weaknesses']):
                break
            if type(hp_tag) == NavigableString:
                hp_str = ''.join((hp_str, hp_tag.string))
            hp_tag = hp_tag.next

        hp_match = re.match(hp_re, hp_str)
        creature.hitPoints = int(hp_match.groupdict().get('hp'))
        creature.hitPointsNotes = hp_match.groupdict().get('hp_notes')

        regen_re = re.compile(
            r'\s*[rR]egeneration (?P<regen>[0-9]+)\s*,?\s*\(?deactivated by\s*(?P<deactivated>[\w ]+)\)?\s*'
        )
        hardness_re = re.compile(r'\s*[hH]ardness (?P<hard>[0-9]+)')
        if creature.hitPointsNotes:
            creature.hitPointsNotes = ''.join(
                re.split(re.compile(r'\s*HP [0-9]+[,;]+\s*'),
                         creature.hitPointsNotes)[1:]).strip(' ;,')
            regen_match: Match = re.match(regen_re, creature.hitPointsNotes)
            if regen_match:
                creature.regeneration = int(regen_match.group('regen'))
                creature.deactivatedBy = regen_match.group('deactivated')
            hardness_match: Match = re.match(hardness_re,
                                             creature.hitPointsNotes)
            if hardness_match:
                creature.hardness = int(hardness_match.group('hard'))
                creature.hitPointsNotes = re.sub(hardness_re, '',
                                                 creature.hitPointsNotes)

        # Immunities; Weaknesses; Resistances
        imm_pattern = re.compile(
            r'\s*(Immunities\s*(?P<imm>[\w\d\s\-(),\']*);?)?\s*(Weaknesses\s*(?P<weak>[\w\d\s\-(),\']*);?)?\s*(Resistances\s*(?P<res>[\w\d\s\-(),\']*);?)?'
        )
        imm_str = ''
        while hp_tag.next:
            if hp_tag.name == 'br' or hp_tag.name == 'hr':
                break
            if type(hp_tag) == NavigableString:
                imm_str = ''.join([imm_str, hp_tag.string])
            hp_tag = hp_tag.next

        imm_match = re.match(imm_pattern, imm_str)
        creature.immunities = [
            x.strip() for x in imm_match.group('imm').split(',')
        ] if imm_match.group('imm') else []
        creature.weaknesses = [
            x.strip() for x in imm_match.group('weak').split(',')
        ] if imm_match.group('weak') else []
        creature.resistances = [
            x.strip() for x in imm_match.group('res').split(',')
        ] if imm_match.group('res') else []

        # Traits
        trait_tag = main_tag
        while trait_tag.next:  # scan through until traits section
            if trait_tag.name and (trait_tag.get('class') == ['traituncommon'
                                                              ]):
                creature.rarity = 'uncommon'
            if trait_tag.name and (trait_tag.get('class') == ['traitrare']):
                creature.rarity = 'rare' if trait_tag.a.string == 'Rare' else 'unique'
            if trait_tag.name and trait_tag.get('class') == ['traitalignment']:
                break
            trait_tag = trait_tag.next

        while trait_tag.next:
            if trait_tag.name == 'br' or trait_tag.name == 'hr':
                break

            if trait_tag.name and trait_tag.get('class') == ['traitalignment']:
                creature.alignment = trait_tag.text
            if trait_tag.name and trait_tag.get('class') == ['traitsize']:
                creature.size = trait_tag.text
            if trait_tag.name and trait_tag.get('class') == ['trait']:
                t: Trait = Trait(name=trait_tag.text,
                                 description=trait_tag.get('title'))
                creature.traits.append(t)
            trait_tag = trait_tag.next

        # Perception and senses
        sense_tag = trait_tag
        sense_str = ''
        sense_re = re.compile(
            r'\s*Perception\s*(?P<per>[+-]?[0-9]+);?\s*(?P<per_notes>[\w\d\s\-()\'+.,]*)?\s*'
        )
        while sense_tag.next:
            if sense_tag.name == 'b' and sense_tag.string == 'Perception':
                break
            sense_tag = sense_tag.next
        while sense_tag.next:
            if sense_tag.name == 'br' or sense_tag.name == 'hr':
                break
            elif type(sense_tag) == NavigableString:
                sense_str = ''.join((sense_str, sense_tag.string))
            sense_tag = sense_tag.next

        sense_match = re.match(sense_re, sense_str)
        creature.perception = int(sense_match.group('per'))
        creature.senses = [
            x.strip() for x in sense_match.group('per_notes').split(',')
        ]

        # languages
        language_tag = sense_tag.find_next('b', text='Languages')
        language_re = re.compile(
            r'\s*Languages\s*(?P<langs>[\w\d\s\-()\'+.,]*);?\s*(?P<comms>[\w\d\s\-()\'+.,]*)?\s*'
        )
        if language_tag:
            language_str = ''
            while language_tag.next:
                if language_tag.name == 'br' or language_tag.name == 'hr':
                    break
                elif type(language_tag) == NavigableString:
                    language_str = ''.join((language_str, language_tag.string))
                language_tag = language_tag.next

            language_match = re.match(language_re, language_str)
            creature.languages = [
                x.strip() for x in language_match.group('langs').split(',')
            ]
            creature.otherCommunication = [
                x.strip() for x in language_match.group('comms').split(',')
            ]

        # skills
        skill_tag = sense_tag.find_next('b', text='Skills')
        skills_re = re.compile(r'\s*Skills\s*(?P<skills>[\w\d\s\-()\'+.,]*)')
        skill_re = re.compile(
            r'(?P<name>[\w ]*)\s*(?P<mod>[+-]+[0-9]+)\s*(?P<notes>\([\w\d\s\-()\'+.,]*\))?\s*'
        )
        if skill_tag:
            skill_str = ''
            while skill_tag.next:
                if skill_tag.name == 'br' or skill_tag.name == 'hr':
                    break
                elif type(skill_tag) == NavigableString:
                    skill_str = ''.join((skill_str, skill_tag.string))
                skill_tag = skill_tag.next

            skill_match = re.match(skills_re, skill_str)
            for s in re.finditer(skill_re, skill_match.group('skills')):
                skill = Header(s.group('name'), s.group('notes'),
                               int(s.group('mod')))
                creature.skills.append(skill)

        # ability mods
        abm_tag = main_tag
        abm_str = ''
        abm_re = re.compile(
            r'\s*Str\s*(?P<str>[+-][0-9]+),\s*Dex\s*(?P<dex>[+-][0-9]+),\s*Con\s*(?P<con>[+-][0-9]+),\s*Int\s*(?P<int>[+-][0-9]+),\s*Wis\s*(?P<wis>[+-][0-9]+),\s*Cha\s*(?P<cha>[+-][0-9]+)\s*'
        )
        while abm_tag.next:
            if abm_tag.name == 'b' and abm_tag.string == 'Str':
                break
            abm_tag = abm_tag.next

        while abm_tag.next:
            if abm_tag.name == 'br' or abm_tag.name == 'hr':
                break
            elif type(abm_tag) == NavigableString:
                abm_str = ''.join((abm_str, abm_tag.string))
            abm_tag = abm_tag.next

        abmods = re.match(abm_re, abm_str)
        creature.abilityMods = [int(x) for x in abmods.groups()]

        # items
        # (for some reason these are listed in the template as ABOVE interaction abilities, but are often NOT)
        item_tag = abm_tag
        item_str = ''
        item_re = re.compile(r'\s*(?P<item>[\w\d\s\-()\'+.,]+),?\s*')
        while item_tag.next:
            if item_tag.name == 'b' and item_tag.string == 'Items':
                break
            item_tag = item_tag.next

        while item_tag.next:
            if item_tag.name == 'br' or item_tag.name == 'hr':
                break
            elif type(item_tag) == NavigableString:
                item_str = ''.join((item_str, item_tag.string))
            item_tag = item_tag.next

        if item_str:
            item_str = item_str.replace('Items', '', 1)
            item_matches = re.findall(item_re, item_str)
            creature.items = [x.strip() for x in item_matches if x.strip()]

        # interaction abilities
        creature.interactionAbilities, _ = get_abilities(abm_tag)

        # AC
        ac_tag = main_tag
        while ac_tag.next and not creature.ac:
            if ac_tag.name and ac_tag.name == 'b' and ac_tag.text == 'AC' and type(
                    ac_tag.next) == NavigableString:
                creature.ac = int(
                    re.match(re.compile(r'\s*(?P<ac>[0-9]+)[;,]?\s*'),
                             ac_tag.next_sibling.string).group('ac'))
            ac_tag = ac_tag.next

        # AC notes, iterate until saves
        while ac_tag.next:
            if ac_tag.name == 'b' and ac_tag.text == 'Fort':
                break
            elif not ac_tag.name:
                creature.acNotes = ''.join([creature.acNotes, ac_tag.string])
            ac_tag = ac_tag.next

        if creature.acNotes:
            creature.acNotes = re.sub(re.compile(r'\s*AC\s*[0-9]+\s*[;,]*'),
                                      '', creature.acNotes).strip()
        # iterate through the whole row until hr/br, then parse the resulting string for saves and notes
        save_pattern = re.compile(
            r'\s*Fort\s*(?P<fort>[+\-][0-9]+)\s*(?P<fort_notes>[\w\d\s\-()\'+.,]*),\s*'
            r'Ref\s*(?P<ref>[+\-][0-9]+)\s*(?P<ref_notes>[\w\d\s\-()\'+.,]*),\s*'
            r'Will\s*(?P<will>[+\-][0-9]+)\s*(?P<will_notes>[\w\d\s\-()\'+.,]*);?'
            r'(?P<save_notes>[\w\d\s\-()\'+.,]*)?\s*')
        saves_str = ''
        while ac_tag.next:
            if ac_tag.name == 'hr' or ac_tag.name == 'br':
                break
            if type(ac_tag) == NavigableString:
                saves_str = ''.join([saves_str, ac_tag.string])
            ac_tag = ac_tag.next

        saves_match = re.match(save_pattern, saves_str)

        creature.fortitude = int(saves_match.group('fort'))
        creature.fortitudeNotes = saves_match.group('fort_notes')
        creature.reflex = int(saves_match.group('ref'))
        creature.reflexNotes = saves_match.group('ref_notes')
        creature.will = int(saves_match.group('will'))
        creature.willNotes = saves_match.group('will_notes')
        creature.saveNotes = saves_match.group('save_notes')

        # automatic abilities
        creature.automaticAbilities, _ = get_abilities(hp_tag)

        # speed
        speed_tag = hp_tag.next
        speed_str = ''
        while speed_tag.next:
            if speed_tag.name == 'b' and speed_tag.string == 'Speed':
                break
            speed_tag = speed_tag.next
        while speed_tag.next and speed_tag.name != 'hr' and speed_tag.name != 'br':
            if type(speed_tag) == NavigableString and speed_tag.string.strip(
            ) != 'Speed':
                speed_str = ''.join((speed_str, speed_tag.string))
            speed_tag = speed_tag.next
        creature.speed = speed_str

        # offensive/proactive abilities
        action_tag: Tag = speed_tag.next
        creature.strikes, action_tag = get_strikes(action_tag)
        creature, action_tag = get_spells(creature, action_tag)
        creature.activeAbilities, action_tag = get_abilities(action_tag)
        creature.sidebars, action_tag = get_sidebars(action_tag)

        # set family
        creature.family = [k for (k, v) in fams.items() if creature.name in v]
        creature.family = creature.family[0] if creature.family else '—'

        # NAVIGABLE STRINGS CAUSE RECURSION MAX DEPTH EXCEPTIONS. Convert to str before setting fields
        creatures.append(dataclasses.asdict(creature))

    return creatures
Exemplo n.º 2
0
 def _import_trait(self, suite, dirname):
     traitdb = Trait(self.conn, suite)
     traitdb.insert_trait(dirname, suite)
Exemplo n.º 3
0
 def _insert_trait_(self, trait, suite):
     traitdb = Trait(self.conn, suite)
     #path = join(self.main_path, suite, trait + '.tar')
     path = join(self.main_path, suite, trait)
     traitdb.insert_trait(path, suite)
Exemplo n.º 4
0
 def traits(self):
     return [Trait("name", self.name)]
Exemplo n.º 5
0
 def defaultTraits():
     return [Trait("name")]
Exemplo n.º 6
0
 def defaultTraits():
     return [Trait("id"), Trait("shelter_id")]