def trait_parse(span): def check_type_trait(trait_class, name): types = [ "Aberration", "Animal", "Astral", "Beast", "Celestial", "Construct", "Dragon", "Dream", "Elemental", "Ethereal", "Fey", "Fiend", "Fungus", "Giant", "Humanoid", "Monitor", "Ooze", "Petitioner", "Plant", "Undead" ] if name in types: return "creature_type" else: return trait_class name = ''.join(span['alt']).replace(" Trait", "") trait_class = ''.join(span['class']) if trait_class != 'trait': trait_class = trait_class.replace('trait', '') if trait_class == 'trait': trait_class = check_type_trait(trait_class, name) text = ''.join(span['title']) trait = { 'name': name, 'classes': [trait_class], 'text': text.strip(), 'type': 'stat_block_section', 'subtype': 'trait' } c = list(span.children) if len(c) == 1: if c[0].name == "a": _, link = extract_link(c[0]) trait['link'] = link else: raise Exception("You should not be able to get here") return trait
def process_skills(section): assert section[0] == "Skills" assert section[2] == None parts = split_stat_block_line(section[1]) parts = rebuilt_split_modifiers(parts) skills = [] for part in parts: children = list(BeautifulSoup(part, 'html.parser').children) a = children.pop(0) name, link = extract_link(a) value, modifier = extract_modifier(''.join([str(c) for c in children])) value = int(value.replace("+", "")) skill = { 'type': 'stat_block_section', 'subtype': 'skill', 'name': name, 'link': link, 'value': value} if modifier: #TODO: fix [] skill['modifiers'] = link_modifiers( build_objects( 'stat_block_section', 'modifier', [modifier])) skills.append(skill) return skills
def process_items(section): assert section[0] == "Items" assert section[2] == None parts = rebuilt_split_modifiers(split_stat_block_line(section[1])) items = [] for part in parts: text, modifier = extract_modifier(part) bs = unwrap_formatting(BeautifulSoup(text, 'html.parser')) name = get_text(bs) item = { 'type': 'stat_block_section', 'subtype': 'item', 'name': name.strip()} if modifier: item['modifiers'] = link_modifiers( build_objects( 'stat_block_section', 'modifier', modifier.split(","))) links = [] while bs.a: #for a in bs.findAll("a"): _, link = extract_link(bs.a) links.append(link) bs.a.unwrap() if len(links) > 0: item['links'] = links items.append(item) return items
def _extract_trait(description): traits = [] newdescription = [] if description.find("(") > -1: front, middle = description.split("(", 1) newdescription.append(front) s = middle.split(")", 1) assert len(s) == 2, s text, back = s bs = BeautifulSoup(text, 'html.parser') if bs.a and bs.a.has_attr('game-obj') and bs.a['game-obj'] == 'Traits': if text.find(" or ") > -1: # TODO - need to solve or cases on traits, Monster ID 518 return description, [] parts = [p.strip() for p in text.replace("(", "").split(",")] for part in parts: bs = BeautifulSoup(part, 'html.parser') children = list(bs.children) assert len(children) == 1, part name, trait_link = extract_link(children[0]) traits.append(build_object( 'stat_block_section', 'trait', name.strip(), {'link': trait_link})) else: newdescription.append(text) newdescription.append(")") description = back newdescription.append(description) return ''.join(newdescription).strip(), traits
def parse_spell(html): spell = {"type": "stat_block_section", "subtype": "spell"} bsh = BeautifulSoup(html, 'html.parser') hrefs = bsh.find_all("a") links = [] for a in hrefs: _, link = extract_link(a) links.append(link) spell['links'] = links text = get_text(bsh) if text.find("(") > -1: parts = [t.strip() for t in text.split("(")] assert len(parts) == 2, "Failed to parse spell: %s" % (html) spell['name'] = parts.pop(0) count_text = parts.pop().replace(")", "") spell["count_text"] = count_text count = None for split in [";", ","]: remainder = [] for part in count_text.split(split): m = re.match(r"^x\d*$", part.strip()) if m: assert count == None, "Failed to parse spell: %s" % (html) count = int(part.strip()[1:]) else: remainder.append(part) count_text = split.join(remainder) if count: spell["count"] = count else: spell['name'] = text spell['count'] = 1 return spell
def process_source(sb, section): # 5 # <a target="_blank" href="Images\Monsters\Alghollthu_VeiledMaster.png"><img class="thumbnail" src="Images\Monsters\Alghollthu_VeiledMaster.png"></a> # <b>Source</b> <a href="https://paizo.com/products/btq01y0m?Pathfinder-Bestiary" target="_blank" class="external-link"><i>Bestiary pg. 14</i></a> # 447 # <a target="_blank" href="Images\Monsters\Grippli.png"><img class="thumbnail" src="Images\Monsters\Grippli.png"></a> # <b>Source</b> <a href="https://paizo.com/products/btq01znt?Pathfinder-Adventure-Path-146-Cult-of-Cinders" target="_blank" class="external-link"><i>Pathfinder #146: Cult of Cinders pg. 86</i></a>, <a href="https://paizo.com/products/btq022yq" target="_blank"><i>Bestiary 2 pg. 139</i></a>; <strong><u><a href="Monsters.aspx?ID=693">There is a more recent version of this monster. Click here to view.</a></u></strong> # 861 # <a target="_blank" href="Images\Monsters\Witchwyrd.png"><img class="thumbnail" src="Images\Monsters\Witchwyrd.png"></a> # <b>Source</b> <a href="https://paizo.com/products/btq022yq" target="_blank" class="external-link"><i>Bestiary 2 pg. 294</i></a>, <a href="https://paizo.com/products/btq02065" target="_blank"><i>Pathfinder #149: Against the Scarlet Triad pg. 90</i></a> def set_image(obj, name): link = obj['href'] image = link.split("\\").pop().split("%5C").pop() sb['image'] = { 'type': 'image', 'name': name, 'game-obj': 'Monster', 'image': image} assert section[0] == "Source" bs = BeautifulSoup(section[1], 'html.parser') c = [c for c in list(bs.children)] # if c.name != "sup"] sources = [] if c[0].find("img"): set_image(c.pop(0), sb['name']) note = None errata = None while len(c) > 0: if c[0].name == "a": sources.append(extract_source(c.pop(0))) elif c[0].name == "sup": assert not errata, "Should be no more than one errata." errata = extract_link(c.pop(0).find("a")) elif c[0].name == "strong": assert not note, "Should be no more than one note." note = extract_link(c.pop(0).find("a")) elif isinstance(c[0], str) and c[0].strip() in [",", ";"]: c.pop(0) elif c[0].name == "br": c.pop(0) else: raise Exception("Source has unexpected text: %s" % c[0]) for source in sources: if note: source['note'] = note[1] if errata: source['errata'] = errata[1] sb['sources'] = sources
def remove_html_weapon(text, section): bs = BeautifulSoup(text, 'html.parser') if list(bs.children)[0].name == "i": bs.i.unwrap() while bs.a: _, link = extract_link(bs.a) section.setdefault("links", []).append(link) bs.a.unwrap() return str(bs)
def creature_stat_block_pass(struct): def add_to_data(key, value, data, link): if key: data.append((key, ''.join([str(v) for v in value]).strip(), link)) key = None value = [] link = None return key, value, data, link def add_remnants(value, data): k,v,_ = data.pop() newvalue = [v] newvalue.extend(value) data.append((k, ''.join([str(v) for v in newvalue]).strip(), link)) return [], data sb = find_stat_block(struct) bs = BeautifulSoup(sb["text"], 'html.parser') objs = list(bs.children) sections = [] data = [] key = None value = [] link = None for obj in objs: if obj.name == 'span' and is_trait(obj): trait = trait_parse(obj) sb.setdefault('traits', []).append(trait) elif obj.name == "br": value.append(obj) key, value, data, link = add_to_data(key, value, data, link) elif obj.name == 'hr': key, value, data, link = add_to_data(key, value, data, link) if len(value) > 0: assert link == None value, data = add_remnants(value, data) data = strip_br(data) sections.append(data) data = [] elif obj.name == "b": key, value, data, link = add_to_data(key, value, data, link) key = get_text(obj) if obj.a: _, link = extract_link(obj.a) else: value.append(obj) if key: key, value, data, link = add_to_data(key, value, data, link) data = strip_br(data) sections.append(data) assert len(sections) == 3, sections process_stat_block(sb, sections)
def parse_attack_effect(parts): effect = { "type": "stat_block_section", "subtype": "attack_damage" } bs = BeautifulSoup(' '.join(parts), 'html.parser') allA = bs.find_all("a") links = [] for a in allA: _, link = extract_link(a) links.append(link) if links: effect["links"] = links effect["effect"] = get_text(bs).strip() return effect
def parse_attack_action(parent_section, attack_type): # tentacle +16 [<a aonid="322" game-obj="Rules"><u>+12/+8</u></a>] (<a aonid="170" game-obj="Traits"><u>agile</u></a>, <a aonid="103" game-obj="Traits"><u>magical</u></a>, <a aonid="192" game-obj="Traits"><u>reach 15 feet</u></a>), <b>Damage</b> 2d8+10 bludgeoning plus slime # trident +10 [<a aonid="322" game-obj="Rules"><u>+5/+0</u></a>], <b>Damage</b> 1d8+4 piercing # trident +7 [<a aonid="322" game-obj="Rules"><u>+2/-3</u></a>] (<a aonid="195" game-obj="Traits"><u>thrown 20 feet</u></a>), <b>Damage</b> 1d8+3 piercing # Sphere of Oblivion +37 [<a aonid="322" game-obj="Rules"><u>+32/+27</u></a>] (<a aonid="103" game-obj="Traits"><u>magical</u></a>), <b>Effect</b> see Sphere of Oblivion # piercing hymn +17 [<a aonid="322" game-obj="Rules"><u>+12/+7</u></a>] (<a aonid="83" game-obj="Traits"><u>good</u></a>, <a aonid="103" game-obj="Traits"><u>magical</u></a>, <a aonid="248" game-obj="Traits"><u>range 90 feet</u></a>, <a aonid="147" game-obj="Traits"><u>sonic</u></a>), <b>Damage</b> 4d6 sonic damage plus 1d6 good and deafening aria # crossbow +14 [<a aonid="322" game-obj="Rules"><u>+9/+4</u></a>] (<a aonid="248" game-obj="Traits"><u>range increment 120 feet</u></a>, <a aonid=\"254\" game-obj="Traits"><u>reload 1</u></a>), <b>Damage</b> 1d8+2 piercing plus crossbow precision text = parent_section['text'] del parent_section['text'] section = { 'type': "stat_block_section", "subtype": "attack", 'attack_type': attack_type, 'name': parent_section['name'] } if 'action' in parent_section: section['action'] = parent_section['action'] del parent_section['action'] if 'traits' in parent_section: section['traits'] = parent_section['traits'] del parent_section['traits'] m = re.search(r"^(.*) ([+-]\d*) \[(.*)\] \((.*)\), (.*)$", text) if not m: m = re.search(r"^(.*) ([+-]\d*) \[(.*)\], (.*)$", text) assert m, "Failed to parse: %s" % (text) attack_data = list(m.groups()) section['weapon'] = remove_html_weapon(attack_data.pop(0), section) attacks = [attack_data.pop(0)] bs = BeautifulSoup(attack_data.pop(0), 'html.parser') children = list(bs.children) assert len(children) == 1, "Failed to parse: %s" % (text) data, link = extract_link(children[0]) attacks.extend(data.split("/")) attacks = [int(a) for a in attacks] section['bonus'] = { "type": "stat_block_section", "subtype": "attack_bonus", "link": link, "bonuses": attacks } damage = attack_data.pop().split(" ") _ = damage.pop(0) section['damage'] = parse_attack_damage(" ".join(damage).strip()) if len(attack_data) > 0: _, traits = extract_starting_traits("(%s)" %(attack_data.pop())) assert 'traits' not in section section['traits'] = traits assert len(attack_data) == 0, "Failed to parse: %s" % (text) parent_section['attack'] = section
def _extract_source(section): if 'text' in section: bs = BeautifulSoup(section['text'], 'html.parser') children = list(bs.children) if children[0].name == "b" and get_text(children[0]) == "Source": children.pop(0) book = children.pop(0) source = extract_source(book) if children[0].name == "sup": assert 'errata' not in source, "Should be no more than one errata." _, source['errata'] = extract_link( children.pop(0).find("a")) if children[0].name == "br": children.pop(0) assert children[0].name != "a", section section['text'] = ''.join([str(c) for c in children]) return [source]
def parse_attack_damage(text): ds = split_list(text.strip(), [" plus ", " and "]) damages = [] for d in ds: damage = { "type": "stat_block_section", "subtype": "attack_damage" } parts = d.split(" ") dice = parts.pop(0).strip() m = re.match(r"^\d*d\d*.?[0-9]*?$", dice) if not m: m = re.match(r"^\d*$", dice) if m: #damage damage["formula"] = dice.replace('–', '-') damage_type = ' '.join(parts) if damage_type.find("(") > -1: parts = damage_type.split("(") damage_type = parts.pop(0).strip() notes = parts.pop(0).replace(")", "").strip() assert len(parts) == 0, "Failed to parse damage: %s" % (text) damage["notes"] = notes if damage_type.find("damage") > -1: # energy touch +36 [<a aonid="322" game-obj="Rules"><u>+32/+28</u></a>] (<a aonid="170" game-obj="Traits"><u>agile</u></a>, <a aonid="99" game-obj="Traits"><u>lawful</u></a>, <a aonid="103" game-obj="Traits"><u>magical</u></a>), <b>Damage</b> 5d8+18 positive or negative damage plus 1d6 lawful damage_type = damage_type.replace(" damage", "") bs = BeautifulSoup(damage_type, 'html.parser') allA = bs.find_all("a") links = [] for a in allA: _, link = extract_link(a) links.append(link) if links: damage["links"] = links damage_type = get_text(bs).strip() if damage_type.startswith("persistent"): damage_type = damage_type.replace("persistent ", "") damage["persistent"] = True if damage_type.find("splash") > -1: damage_type = damage_type.replace("splash", "").strip() damage["splash"] = True damage["damage_type"] = damage_type else: #effect parts.insert(0, dice) damage = parse_attack_effect(parts) damages.append(damage) return damages
def process_perception(section): assert section[0] == "Perception" assert section[2] == None parts = split_stat_block_line(section[1]) value = parts.pop(0) value = int(value.replace("+", "")) perception = { 'type': 'stat_block_section', 'subtype': 'perception', 'name': 'perception', 'value': value} if len(parts) > 0: if parts[0].startswith("("): modifier = parts.pop(0) modifier = modifier.replace("(", "").replace(")", "") #TODO: fix [] perception['modifiers'] = link_modifiers( build_objects( 'stat_block_section', 'modifier', [modifier])) if len(parts) > 0: special_senses = [] for part in parts: part, modifier = extract_modifier(part) bs = BeautifulSoup(part, 'html.parser') children = list(bs.children) sense = None if children[0].name == "a": name, link = extract_link(children[0]) sense = build_object( 'stat_block_section', 'special_sense', name, {'link': link}) else: sense = build_object( 'stat_block_section', 'special_sense', part) if modifier: #TODO: fix [] sense['modifiers'] = link_modifiers( build_objects( 'stat_block_section', 'modifier', [modifier])) special_senses.append(sense) perception['special_senses'] = special_senses return perception
def process_languages(section): # 1, Unseen Servant # <b>Languages</b> - (understands its creator) # 2, Alghollthu Master # <b>Languages</b> <a href="Languages.aspx?ID=13"><u>Aklo</a></u>, <a href="Languages.aspx?ID=24"><u>Alghollthu</a></u>, <a href="Languages.aspx?ID=14"><u>Aquan</a></u>, <a href="Languages.aspx?ID=1"><u>Common</a></u>, <a href="Languages.aspx?ID=11"><u>Undercommon</a></u> # 204 # <b>Languages</b> pidgin of <a style="text-decoration:underline" href="Languages.aspx?ID=6">Goblin</a>, <a style="text-decoration:underline" href="Languages.aspx?ID=8">Jotun</a>, and <a style="text-decoration:underline" href="Languages.aspx?ID=9">Orcish</a> # 211 # <b>Languages</b> <a href="Languages.aspx?ID=1"><u>Common</a></u>; one elemental language (Aquan, Auran, Ignan, or Terran), one planar language (Abyssal, Celestial, or Infernal); telepathy 100 feet # 343, Quelaunt # <b>Languages</b> <a href="Languages.aspx?ID=13"><u>Aklo</a></u>; (can't speak any language); telepathy 100 feet # 639, Drainberry Bush # <b>Languages</b> <a href="Languages.aspx?ID=13"><u>Aklo</a></u>, <a href="Languages.aspx?ID=1"><u>Common</a></u>, <a href="Languages.aspx?ID=10"><u>Sylvan</a></u>; <a style="text-decoration:underline" href="Spells.aspx?ID=340"><i>tongues</i></a> # 98, Succubus # <b>Languages</b> <a href="Languages.aspx?ID=12"><u>Abyssal</a></u>, <a href="Languages.aspx?ID=16"><u>Celestial</a></u>, <a href="Languages.aspx?ID=1"><u>Common</a></u>, <a href="Languages.aspx?ID=2"><u>Draconic</a></u>; three additional mortal languages; telepathy 100 feet, <a style="text-decoration:underline" href="Spells.aspx?ID=340"><i>tongues</i></a> assert section[0] == "Languages" assert section[2] == None text = section[1] languages = build_object( 'stat_block_section', 'languages', 'Languages', {'languages': []}) if text.find(";") > -1: parts = text.split(";") text = parts.pop(0) assert len(parts) in [1,2], parts parts = rebuilt_split_modifiers(split_stat_block_line(";".join(parts))) abilities = [] for part in parts: newtext, modifier = extract_modifier(part.strip()) if newtext.strip() == "": languages['modifiers'] = link_modifiers( build_objects( 'stat_block_section', 'modifier', [m.strip() for m in modifier.split(",")])) else: bs = BeautifulSoup(newtext, 'html.parser') link = None if bs.a: newtext, link = extract_link(bs.a) ability = build_object( 'stat_block_section', 'ability', newtext, { 'ability_type': 'communication'}) if link: #TODO: fix [] ability['links'] = [link] if(modifier): #TODO: fix [] ability['modifiers'] = link_modifiers( build_objects( 'stat_block_section', 'modifier', [ modifier.strip()])) abilities.append(ability) if len(abilities) > 0: languages['communication_abilities'] = abilities parts = rebuilt_split_modifiers(split_stat_block_line(text)) for text in parts: text, modifier = extract_modifier(text) bs = BeautifulSoup(text, 'html.parser') c = list(bs.children) if len(c) > 1: text = [] for child in c: if child.name == "a": name, link = extract_link(child) text.append(name) elif isinstance(child, str): text.append(child) language = { 'name': ''.join(text), 'type': 'stat_block_section', 'subtype': 'language', 'link': link} else: assert len(c) == 1 if c[0].name == 'a': name, link = extract_link(c[0]) language = { 'name': get_text(bs), 'type': 'stat_block_section', 'subtype': 'language', 'link': link} else: language = { 'name': get_text(bs), 'type': 'stat_block_section', 'subtype': 'language'} if modifier: #TODO: fix [] language['modifiers'] = link_modifiers( build_objects( 'stat_block_section', 'modifier', [modifier])) languages['languages'].append(language) return languages