l['Source'] = source found = True #if not found: # print("- une description existe pour '" + name + "' mais pas le sommaire!"); for URLDET in URLDETS: print(URLDET) if MOCK_WD: content = BeautifulSoup(open(MOCK_WD),features="lxml").body else: content = BeautifulSoup(urllib.request.urlopen(URLDET).read(),features="lxml").body section = jumpTo(content, 'h1',{'class':'separator'}, u"À mains nues") if not section: section = jumpTo(content, 'h1',{'class':'separator'}, u"Description individuelle des armes orientales") newObj = True name = "" descr = "" source = None sourceNext = None for s in section: if s.name == 'div': for e in s.children: if e.name == 'h2' or e.name == 'b': if not newObj: addInfos(liste, name, sourceNext)
l['Description'] = descr.strip() if not source is None: l['Source'] = source found = True if not found: print("- une description existe pour '" + name + "' mais pas le sommaire!") if MOCK_WD: content = BeautifulSoup(open(MOCK_WD), features="lxml").body else: content = BeautifulSoup(urllib.request.urlopen(URLDET).read(), features="lxml").body section = jumpTo(content, 'h1', {'class': 'separator'}, "Armures classiques") newObj = True name = "" descr = "" source = None sourceNext = None for s in section: if s.name == 'div': for e in s.children: if e.name == 'h2' or e.name == 'b': if not newObj: addInfos(liste, name, sourceNext) sourceNext = source if e.name == 'h2':
'Nom', 'Classe', 'Archétype', 'Prérequis', 'Source', 'Niveau', 'Auto', 'Description', 'Référence' ] MATCH = ['Nom', 'Classe', 'Archétype'] liste = [] print("Extraction des aptitude (astuces)...") if MOCK_ASTUCE: content = BeautifulSoup(open(MOCK_ASTUCE), features="lxml").body else: content = BeautifulSoup(urllib.request.urlopen(URL).read(), features="lxml").body section = jumpTo(content, 'h2', {'class': 'separator'}, "Description des astuces de ninja") LVL = 2 astuce = {'Niveau': LVL} newObj = False descr = "" source = 'AG' for s in section: if s.name == 'h2' and "Description des astuces de maître" in s.text: LVL = 10 elif s.name == "table": for td in s.find_all('td'): for el in td.children: if el.name == "h3": nom = cleanSectionName(el.text) reference = URL + el.find_next("a")['href']
# add infos to existing weapong in list found = False for l in liste: for n in names: if l['Nom'].lower() == n.lower() or l['Nom'].lower( ).startswith(n.lower()): l['Complete'] = True l['Description'] = descr.strip() if not source is None: l['Source'] = source found = True if not found: print("- une description existe pour '" + name + "' mais pas le sommaire!") section = jumpTo(content, 'h2', {'class': 'separator'}, u"Descriptions") if not section: section = jumpTo(content, 'h2', {'class': 'separator'}, data["category"]) if not section: print("No descriptions found for %s" % data["category"]) exit(1) newObj = True name = "" descr = "" source = None sourceNext = None for e in section: if e.name == 'h3': if not newObj:
FIELDS = ['Nom', 'Classe', 'Archétype', 'Prérequis', 'Source', 'Niveau', 'Auto', 'Description', 'Référence' ] MATCH = ['Nom', 'Classe', 'Archétype'] liste = [] print("Extraction des aptitude (exploitations)...") source = 'MCA' if MOCK_EXPLOITATION: content = BeautifulSoup(open(MOCK_EXPLOITATION),features="lxml").body else: content = BeautifulSoup(urllib.request.urlopen(URL).read(),features="lxml").body section = jumpTo(content, 'h2',{'class':'separator'}, u"Exploitations d'arcaniste") level = 0 for s in section: if s.name == 'div' and s.has_attr('class') and "article_2col" in s['class']: level = 2 if level == 0 else 11 exploitation = {'Source':source,'Niveau':level} newObj = False brCount = 0 descr = "" for e in s.children: if e.name == 'h3': if newObj: exploitation['Classe'] = 'Arcaniste'
#MOCK_MALEFICE = "mocks/malefices.html" # décommenter pour tester avec les maléfices pré-téléchargées URL = "http://www.pathfinder-fr.org/Wiki/Pathfinder-RPG.mal%c3%a9fices.ashx" FIELDS = ['Nom', 'Classe', 'Archétype', 'Prérequis', 'Source', 'Niveau', 'Auto', 'Description', 'Référence' ] MATCH = ['Nom', 'Classe', 'Archétype'] liste = [] print("Extraction des aptitude (maléfices)...") if MOCK_MALEFICE: content = BeautifulSoup(open(MOCK_MALEFICE),features="lxml").body else: content = BeautifulSoup(urllib.request.urlopen(URL).read(),features="lxml").body section = jumpTo(content, 'h2',{'class':'separator'}, u"Maléfices") LVL = 1 malefice = {'Niveau':LVL} newObj = False descr = "" source = 'MJRA' for s in section: if s.name == 'h2' and "Maléfices majeurs" in s.text: LVL = 10 elif s.name == 'h2' and "Grands maléfices" in s.text: LVL = 18 elif s.name == "table": for td in s.find_all('td'): for el in td.children: if el.name == "h3":
if el.name == "a" or el.name == "div": src = extractSource(el) if src: source = src benediction = {} benediction['Nom'] = name benediction['Classe'] = 'Oracle' benediction['Niveau'] = 1 benediction['Auto'] = False benediction['Description'] = cleanDescription(description) benediction['Source'] = source benediction['Référence'] = u liste.append(benediction) section = jumpTo(content, 'h2', {'class':'separator'}, 'Révélations') if not section: print('Aucune section "Révélations" trouvée!'); exit(1) descr = "" for el in section: if el.name == "b": benedictionName = cleanLabel(el.text) benediction = {} benediction['Nom'] = name + ": " + benedictionName benediction['Classe'] = 'Oracle' benediction['Niveau'] = 1 benediction['Auto'] = False
'Nom', 'Classe', 'Archétype', 'Prérequis', 'Source', 'Niveau', 'Auto', 'Description', 'Référence' ] MATCH = ['Nom', 'Classe', 'Archétype'] liste = [] print("Extraction des aptitude (talents)...") if MOCK_TALENT: content = BeautifulSoup(open(MOCK_TALENT), features="lxml").body else: content = BeautifulSoup(urllib.request.urlopen(URL).read(), features="lxml").body section = jumpTo(content, 'h2', {'class': 'separator'}, u"Description des talents de roublard") level = 0 for s in section: if s.name == 'div' and s.has_attr( 'class') and "article_2col" in s['class']: level = 2 if level == 0 else 10 talent = {'Source': 'MJ', 'Niveau': level} newObj = False brCount = 0 descr = "" for e in s.children: if e.name == 'h3': if newObj:
exit(1) return "MJ" liste = [] listePouvoirs = [] print("Extraction des aptitude (lignages)...") if MOCK_LIGNAGE: content = BeautifulSoup(open(MOCK_LIGNAGE), features="lxml").body else: content = BeautifulSoup(urllib.request.urlopen(URL).read(), features="lxml").body section = jumpTo(content, 'h2', {'class': 'separator'}, "Les lignages d'ensorceleurs¶") for s in section: if s.name == "div" and "navmenu" in s.attrs['class']: lignages = s.find_all("li") for l in lignages: link = l.find("a") if link is None: continue lignage = {} lignage['Nom'] = "Lignage: " + link.text lignage['Classe'] = "Ensorceleur" lignage['Source'] = extractSource(l.text) lignage['Niveau'] = 1 lignage['Description'] = ""
link = data['Référence'] if data['Nom'] == "Duergar" or data['Nom'] == "Suli": print("Ignore race %s saisie manuellement" % data['Nom']) continue print("Extraction des traits alternatifs de %s" % data['Nom']) pageURL = link if MOCK_RACE: content = BeautifulSoup(open(MOCK_RACE),features="lxml").body else: content = BeautifulSoup(urllib.request.urlopen(pageURL).read(),features="lxml").body # traits alternatifs section = jumpTo(content, 'h2',{'class':'separator'}, u"Traits raciaux alternatifs") for s in section: if s.name == 'h2': if not found: print("Aucun trait racial alternatif trouvé!") break; # avoid jumping to other sections if s.name == 'div' and 'class' in s.attrs and "row" in s.attrs['class']: for attr in s.find_all('li'): descr = "" remplaceText = "" modifieText = "" for el in attr.children: if el.name == 'b': name = el.text.strip() if name.endswith('.'):
'Nom', 'Classe', 'Archétype', 'Prérequis', 'Source', 'Niveau', 'Auto', 'Description', 'Référence' ] MATCH = ['Nom', 'Classe', 'Archétype'] liste = [] print("Extraction des aptitude (ordres)...") if MOCK_ORDRE: content = BeautifulSoup(open(MOCK_ORDRE), features="lxml").body else: content = BeautifulSoup(urllib.request.urlopen(URL).read(), features="lxml").body section = jumpTo(content, 'h2', {'class': 'separator'}, u"Ordres de chevalier") ordre = {'Niveau': 1} newObj = False descr = "" source = 'MJRA' for el in section: if el.name == "h2": break if el.name == "h3": nom = cleanSectionName(el.text) reference = URL + el.find_next("a")['href'] if newObj: ordre['Classe'] = 'Chevalier'
URL = "http://www.pathfinder-fr.org/Wiki/Pathfinder-RPG.%c3%89tats%20pr%c3%a9judiciables.ashx" MOCK_CF = None #MOCK_CF = "mocks/conditions.html" # décommenter pour tester avec les conditions pré-téléchargées FIELDS = ['Nom', 'Source', 'Description', 'Référence'] MATCH = ['Nom'] liste = [] if MOCK_CF: content = BeautifulSoup(open(MOCK_CF), features="lxml").body else: content = BeautifulSoup(urllib.request.urlopen(URL).read(), features="lxml").body section = jumpTo(content, 'h2', {'class': 'separator'}, "Liste des états préjudiciables") SOURCE = "MJ" condition = {'Source': SOURCE} newObj = False advantage = False descr = "" for s in section: if s.name == 'h2': condition['Description'] = descr.strip() liste.append(condition) # avantages SOURCE = "AM"
domain['Classe'] = "Prêtre" domain['Source'] = "MJ" domain['Niveau'] = 1 domain['Description'] = "" domain['Référence'] = "http://www.pathfinder-fr.org/Wiki/" + link["href"] print("Traitement: " + link["href"]) if MOCK_DOMAINE: domainHTML = BeautifulSoup(open(MOCK_DOMAINE_PAGE), features="lxml").body else: domainHTML = BeautifulSoup(urllib.request.urlopen( domain['Référence']).read(), features="lxml").body pouvoirs = jumpTo(domainHTML, 'h2', {'class': 'separator'}, "Pouvoirs accordés") if pouvoirs is None: pouvoirs = jumpTo(domainHTML, 'b', {}, "Pouvoirs accordés") if pouvoirs is None: print("NOT FOUND!!") continue for p in pouvoirs: if (p.name == 'h2'): break else: domain['Description'] += html2text(p) liste.append(domain) #exit(1)
'Nom', 'Classe', 'Archétype', 'Prérequis', 'Source', 'Niveau', 'Auto', 'Description', 'Référence' ] MATCH = ['Nom', 'Classe', 'Archétype'] liste = [] print("Extraction des aptitude (rages)...") if MOCK_RAGE: content = BeautifulSoup(open(MOCK_RAGE), features="lxml").body else: content = BeautifulSoup(urllib.request.urlopen(URL).read(), features="lxml").body section = jumpTo(content, 'h2', {'class': 'separator'}, u"Description des pouvoirs de rage") source = None sourceNext = None for s in section: if s.name == 'div': rage = {'Source': 'MJ', 'Niveau': 1} newObj = False brCount = 0 descr = "" for e in s.children: if e.name == 'h3': if newObj: rage['Classe'] = 'Barbare' rage['Description'] = descr.strip() if not sourceNext is None:
# titre name = content.find_next('h1', {'class': 'pagetitle'}).string.strip() if name.startswith('Les '): name = name[4:-1].title() race['Nom'] = data['name'] # source race['Source'] = data['source'] # référence race['Référence'] = link # traits race['Traits'] = [] section = jumpTo(content, 'h2', {'class': 'separator'}, "Traits raciaux standards") for s in section: if s.name == 'div' and 'class' in s.attrs and "arrondi" in s.attrs[ 'class']: first = True for attr in s.find_all('li'): trait = {} descr = "" for el in attr.children: if el.name == 'b': name = el.text.strip() if first: trait['Nom'] = "Caractéristiques" descr = name else: if name.endswith('.'):
if not ptsComp: print("Points de compétence non-trouvé pour classe: %s" % name) exit(1) m = re.search('(\d) \\+ modificateur d[\'’]Intelligence', ptsComp) if not m: print("Points de compétence n'a pas pu être extrait!") exit(1) cl['RangsParNiveau'] = int(m.group(1)) # compétences de classe cl['CompétencesDeClasse'] = [] sectionNames = ["Compétences de classe", "Compétences de la classe"] section = None for s in sectionNames: section = jumpTo(content, 'h2', {'class': 'separator'}, s) if section: break if not section: print("- Compétences de la classe %s n'a pas être trouvée!!!" % cl['Nom']) continue for s in section: if s.name == 'a' and (len(s.text) > 3 or s.text.lower() == "vol"): value = s.text if value == u"Connaissances": idx = s.next_sibling.index(')') if idx > 0: value += s.next_sibling[0:idx + 1].lower()