Пример #1
0
def scrapeDIYWalls(key):
    url = urls.get(key)
    response = (requests.get(url, timeout=5))
    soup = BeautifulSoup(response.content, "html.parser")
    table = soup.find_all("table", {"class": "sortable"})
    itemList = []
    for item in table[0].find_all("tr")[1:]:  # change to [1:] when done
        itemObject = {"name": item.findChildren("td")[0].text.strip("\n")}
        if item.findChildren("a")[1]['href']:
            itemObject["imageLink"] = item.findChildren("a")[1]['href']
        if item.findChildren("td")[2]:
            itemObject["materials"] = separateByBr(
                item.findChildren("td")[2]).strip("\n").split(",")
            itemObject["materialsImageLink"] = getImageLinks(
                item.findChildren("td")[2].find_all("img"))
        if item.findChildren("td")[3].findChildren("a"):
            itemObject["sizeLink"] = item.findChildren("td")[3].findChildren(
                "a")[0]['href']
        if item.findChildren("td")[4].text:
            itemObject["obtainedFrom"] = item.findChildren("td")[4].text.strip(
                '\n')
        if item.findChildren("td")[5].text.strip().replace(",", ""):
            itemObject["price"] = int(
                item.findChildren("td")[5].text.strip().replace(",", ""))
        itemList.append(itemObject)
    dumpData(itemList, key)
    return itemList
Пример #2
0
def scrapeDIYOthers(key):
    url = URLS.get(key)
    response = (requests.get(url, timeout=5))
    soup = BeautifulSoup(response.content, "html.parser")
    table = soup.find_all("table", {"class": "roundy"})
    items = {}
    for tr in table[0].find_all("tr")[1:]:
        name = tr.findChildren("td")[0].a.text
        item = {
            "name": name,
        }
        items[name] = item
        if tr.findChildren("a")[1]['href']:
            item["imageLink"] = tr.findChildren("a")[1]['href']
        if tr.findChildren("td")[2]:
            item["materials"] = separateByBr(
                tr.findChildren("td")[2]).strip("\n").split(",")
        if tr.findChildren("td")[2].find_all("img"):
            item["materialsImageLink"] = getImageLinks(
                tr.findChildren("td")[2].find_all("img"))
        if tr.findChildren("td")[3].img.get("data-src"):
            item["sizeImageLink"] = tr.findChildren("td")[3].img.get(
                "data-src")
        if tr.findChildren("td")[4].text:
            item["obtainedFrom"] = tr.findChildren("td")[4].text.strip(
                "\n").splitlines()
        if tr.findChildren("td")[5]:
            item["price"] = int(
                tr.findChildren("td")[5].next.strip().replace(",", ""))
    dumpData(items, key)
    return items
Пример #3
0
def scrapeDIYTools(key):
    url = urls.get(key)
    response = (requests.get(url, timeout=5))
    soup = BeautifulSoup(response.content, "html.parser")
    table = soup.find_all("table", {"class": "sortable"})
    itemList = []
    for item in table[0].find_all("tr")[1:]:
        itemInfo = []
        for td in item.find_all("td"):
            if not td.string is None:
                itemInfo.append(td.next.strip())
            else:
                itemInfo.append(td.next)
        itemObject = {
            "name": item.findChildren("td")[0].a.text,
        }
        try:
            itemObject["imageLink"] = item.findChildren("a")[1]['href']
        except AttributeError:
            itemObject["imageLink"] = None
        try:
            itemObject["materials"] = separateByBr(
                item.findChildren("td")[2]).strip("\n").split(",")
        except AttributeError:
            itemObject["materials"] = []
        try:
            itemObject["materialsImageLink"] = getImageLinks(
                item.findChildren("td")[2].find_all("img"))
        except AttributeError:
            itemObject["materialsImageLink"] = []
        try:
            itemObject["sizeLink"] = itemInfo[3].img.get("data-src")
        except AttributeError:
            itemObject["sizeLink"] = None
        try:
            itemObject["obtainedFrom"] = itemInfo[4].text
        except AttributeError:
            itemObject["obtainedFrom"] = None
        try:
            itemObject["price"] = int(itemInfo[5].strip().replace(",", ""))
        except:
            itemObject["price"] = None
        try:
            itemObject["isRecipeItem"] = avaiConverter(itemInfo[6])
        except:
            itemObject["isRecipeItem"] = None
        itemList.append(itemObject)
    dumpData(itemList, key)
    return itemList