else:
    content = BeautifulSoup(urllib.request.urlopen(REFERENCE).read(),features="lxml").body

if MOCK_MAGIC_META:
    meta = BeautifulSoup(open(MOCK_MAGIC_META),features="lxml").body
else:
    meta = BeautifulSoup(urllib.request.urlopen(PATHFINDER + REFERENCE_META).read(),features="lxml").body


##
## Load all metamagie
##
metalist = []
tables = meta.find_all('div',{'class':['BD']})
for t in tables:
    data = extractBD_Type2(t)
    # extract property
    data['prop'] = data['nomAlt'].split(', ')[1].lower()
    metalist.append(data)

propList = []

tables = content.find_all('table',{'class':['tablo col1centre']})

print("Extraction des sceptres magiques...")

liste = []
exists = []

tableIdx = 0
for t in tables:
                exit(1)

        elif href and not "#" in href:
            # récupérer le détail d'un objet
            if MOCK_MAGIC_ITEM:
                page = BeautifulSoup(open(MOCK_MAGIC_ITEM),
                                     features="lxml").body
            else:
                page = BeautifulSoup(urllib.request.urlopen(PATHFINDER +
                                                            href).read(),
                                     features="lxml").body

            reference = PATHFINDER + href
            data = {
                **data,
                **extractBD_Type2(page.find('div', {'class': ['BD']}))
            }
            descr = data['descr']

            if len(data['descr']) == 0:
                print("Description invalide pour: %s" % href)
                exit(1)

        element = {}
        element["Nom"] = data["nomAlt"]  # prendre le nom de la page détaillée
        element["Type"] = TYPE
        element["Prix"] = data["prix"]
        element["Source"] = "MJ"
        element["Description"] = data["descr"]
        element["Référence"] = reference
Exemplo n.º 3
0
        else:
            try:
                page = BeautifulSoup(urllib.request.urlopen(reference).read(),
                                     features="lxml").body

                pageNotFound = page.find('h1', {'class': ['pagetitlesystem']})
                if (pageNotFound and pageNotFound.text() == 'Page Not Found'):
                    continue

            except:
                #print("Page doesn't exist! Skipping...")
                continue

        for boite in page.find_all('div', {'class': ['BD']}):

            data = {**extractBD_Type2(boite)}
            descr = data['descr']

            if len(data['descr']) == 0:
                print("Description invalide pour: %s" % href)
                exit(1)

            element = {}
            element["Nom"] = data[
                "nomAlt"]  # prendre le nom de la page détaillée
            element["Type"] = TYPE
            element["Prix"] = data[
                "prixAlt"]  # prendre le prix de la page détaillée
            element["Source"] = "MJ"
            element["Description"] = data["descr"]
            element["Référence"] = reference