Пример #1
0
def scrapeDIYWalls(key):
    url = urls.get(key)
    response = (requests.get(url, timeout=5))
    soup = BeautifulSoup(response.content, "html.parser")
    table = soup.find_all("table", {"class": "sortable"})
    itemList = []
    for item in table[0].find_all("tr")[1:]:  # change to [1:] when done
        itemObject = {"name": item.findChildren("td")[0].text.strip("\n")}
        if item.findChildren("a")[1]['href']:
            itemObject["imageLink"] = item.findChildren("a")[1]['href']
        if item.findChildren("td")[2]:
            itemObject["materials"] = separateByBr(
                item.findChildren("td")[2]).strip("\n").split(",")
            itemObject["materialsImageLink"] = getImageLinks(
                item.findChildren("td")[2].find_all("img"))
        if item.findChildren("td")[3].findChildren("a"):
            itemObject["sizeLink"] = item.findChildren("td")[3].findChildren(
                "a")[0]['href']
        if item.findChildren("td")[4].text:
            itemObject["obtainedFrom"] = item.findChildren("td")[4].text.strip(
                '\n')
        if item.findChildren("td")[5].text.strip().replace(",", ""):
            itemObject["price"] = int(
                item.findChildren("td")[5].text.strip().replace(",", ""))
        itemList.append(itemObject)
    dumpData(itemList, key)
    return itemList
Пример #2
0
def scrapeDIYOthers(key):
    url = URLS.get(key)
    response = (requests.get(url, timeout=5))
    soup = BeautifulSoup(response.content, "html.parser")
    table = soup.find_all("table", {"class": "roundy"})
    items = {}
    for tr in table[0].find_all("tr")[1:]:
        name = tr.findChildren("td")[0].a.text
        item = {
            "name": name,
        }
        items[name] = item
        if tr.findChildren("a")[1]['href']:
            item["imageLink"] = tr.findChildren("a")[1]['href']
        if tr.findChildren("td")[2]:
            item["materials"] = separateByBr(
                tr.findChildren("td")[2]).strip("\n").split(",")
        if tr.findChildren("td")[2].find_all("img"):
            item["materialsImageLink"] = getImageLinks(
                tr.findChildren("td")[2].find_all("img"))
        if tr.findChildren("td")[3].img.get("data-src"):
            item["sizeImageLink"] = tr.findChildren("td")[3].img.get(
                "data-src")
        if tr.findChildren("td")[4].text:
            item["obtainedFrom"] = tr.findChildren("td")[4].text.strip(
                "\n").splitlines()
        if tr.findChildren("td")[5]:
            item["price"] = int(
                tr.findChildren("td")[5].next.strip().replace(",", ""))
    dumpData(items, key)
    return items
Пример #3
0
def scrapeFish(key):  # same logic as scrapeBugs
    url = urls.get(key)
    response = (requests.get(url, timeout=5))
    soup = BeautifulSoup(response.content, "html.parser")
    table = soup.find_all("table", {"class": "sortable"})
    itemList = []
    for item in table[0].find_all("tr")[1:]:
        itemInfo = []
        for td in item.find_all("td"):
            itemInfo.append(td.next.strip())
        itemObject = {
            "name": item.findChildren("a")[0].text,
            "imageLink": item.findChildren("a")[1]['href'],
            "price": int(itemInfo[2]),
            "location": item.findChildren("td")[3].text.strip('\n').strip(),
            "shadowSize": itemInfo[4],  # specific to fish
            "time": item.findChildren("small")[0].text,
            "jan": avaiConverter(itemInfo[6]),
            "feb": avaiConverter(itemInfo[7]),
            "mar": avaiConverter(itemInfo[8]),
            "apr": avaiConverter(itemInfo[9]),
            "may": avaiConverter(itemInfo[10]),
            "jun": avaiConverter(itemInfo[11]),
            "jul": avaiConverter(itemInfo[12]),
            "aug": avaiConverter(itemInfo[13]),
            "sep": avaiConverter(itemInfo[14]),
            "oct": avaiConverter(itemInfo[15]),
            "nov": avaiConverter(itemInfo[16]),
            "dec": avaiConverter(itemInfo[17])
        }
        itemList.append(itemObject)
    dumpData(itemList, key)

    return itemList
Пример #4
0
def scrapeBugs(key):  # take url and return object containing bugs data
    url = URLS.get(key)
    # create soup object
    response = (requests.get(url, timeout=5))
    soup = BeautifulSoup(response.content, "html.parser")
    # find the target table
    table = soup.find_all("table", {"class": "sortable"})

    items = {}
    # go through each tr in the table, ignoring the table header
    for tr in table[0].find_all("tr")[1:]:
        tableData = []
        # get rid of empty space
        for td in tr.find_all("td"):
            tableData.append(td.next.strip())
        # find data and save it into an object
        name = tr.findChildren("td")[0].a.text
        item = {
            "name": name,
            "imageLink": tr.findChildren("a")[1]['href'],
            "price": int(tableData[2]),
            "location": tr.findChildren("td")[3].text.strip('\n').strip(),
            "time": tr.findChildren("small")[0].text,
            "seasons-northern-hemisphere": {
                "jan": avaiConverter(tableData[5]),
                "feb": avaiConverter(tableData[6]),
                "mar": avaiConverter(tableData[7]),
                "apr": avaiConverter(tableData[8]),
                "may": avaiConverter(tableData[9]),
                "jun": avaiConverter(tableData[10]),
                "jul": avaiConverter(tableData[11]),
                "aug": avaiConverter(tableData[12]),
                "sep": avaiConverter(tableData[13]),
                "oct": avaiConverter(tableData[14]),
                "nov": avaiConverter(tableData[15]),
                "dec": avaiConverter(tableData[16])
            },
            "seasons-southern-hemisphere":
            {  # shift northern hemisphere by 6 months
                "jan": avaiConverter(tableData[11]),
                "feb": avaiConverter(tableData[12]),
                "mar": avaiConverter(tableData[13]),
                "apr": avaiConverter(tableData[14]),
                "may": avaiConverter(tableData[15]),
                "jun": avaiConverter(tableData[16]),
                "jul": avaiConverter(tableData[5]),
                "aug": avaiConverter(tableData[6]),
                "sep": avaiConverter(tableData[7]),
                "oct": avaiConverter(tableData[8]),
                "nov": avaiConverter(tableData[9]),
                "dec": avaiConverter(tableData[10])
            }
        }
        items[name] = item
    dumpData(items, key)
    # return for debugging
    return items
Пример #5
0
    def onData(self, interest, data):
        """
        FileSync:
            To be written (TBW)

        """
        # TODO: Verify packet
        self.keyChain.verifyData(data, self.onVerified, self.onVerifyFailed)

        util.dump("Got data packet with name", data.getName().toUri())
        util.dumpData(data)

        content = fileSyncBuf_pb2.FileSync()
        content.ParseFromString(data.getContent().toRawStr())
        print("Type: " + str(content.dataType) + ", data: " + content.data)

        if self.getNowMilliseconds() - content.timestamp * 1000.0 < 120000.0:
            # Use getattr because "from" is a reserved keyword.
            name = getattr(content, "from")
            prefix = data.getName().getPrefix(-2).toUri()
            sessionNo = int(data.getName().get(-2).toEscapedString())
            sequenceNo = int(data.getName().get(-1).toEscapedString())
            nameAndSession = name + str(sessionNo)

            l = 0
            # Update roster.
            while l < len(self.roster):
                entry = self.roster[l]
                tempName = entry[0:len(entry) - 10]
                tempSessionNo = int(entry[len(entry) - 10:])
                if (name != tempName and content.dataType !=
                        fileSyncBuf_pb2.FileSync.UNSUBSCRIBE):
                    l += 1
                else:
                    if name == tempName and sessionNo > tempSessionNo:
                        self.roster[l] = nameAndSession
                    break

            if l == len(self.roster):
                self.roster.append(nameAndSession)
                print(name + ": Subscribe")

            # Use getattr because "from" is a reserved keyword.
            if (content.dataType == fileSyncBuf_pb2.FileSync.UPDATE
                    and not self.isRecoverySyncState
                    and getattr(content, "from") != self.screenName):
                self.onRecievedFileUpdate(content)
            elif content.dataType == fileSyncBuf_pb2.FileSync.UNSUBSCRIBE:
                # leave message
                try:
                    n = self.roster.index(nameAndSession)
                    if name != self.screenName:
                        self.roster.pop(n)
                        print(name + ": Unsubscribe")
                except ValueError:
                    pass
Пример #6
0
def scrapeFish(key):  # same logic as scrapeBugs
    url = URLS.get(key)
    response = (requests.get(url, timeout=5))
    soup = BeautifulSoup(response.content, "html.parser")
    table = soup.find_all("table", {"class": "sortable"})
    items = {}
    for tr in table[0].find_all("tr")[1:]:
        tableData = []
        for td in tr.find_all("td"):
            tableData.append(td.next.strip())
        name = tr.findChildren("td")[0].a.text
        item = {
            "name": name,
            "imageLink": tr.findChildren("a")[1]['href'],
            "price": int(tableData[2]),
            "location": tr.findChildren("td")[3].text.strip('\n').strip(),
            "shadowSize": tableData[4],  # specific to fish
            "time": tr.findChildren("small")[0].text,
            "seasons-northern-hemisphere": {
                "jan": avaiConverter(tableData[6]),
                "feb": avaiConverter(tableData[7]),
                "mar": avaiConverter(tableData[8]),
                "apr": avaiConverter(tableData[9]),
                "may": avaiConverter(tableData[10]),
                "jun": avaiConverter(tableData[11]),
                "jul": avaiConverter(tableData[12]),
                "aug": avaiConverter(tableData[13]),
                "sep": avaiConverter(tableData[14]),
                "oct": avaiConverter(tableData[15]),
                "nov": avaiConverter(tableData[16]),
                "dec": avaiConverter(tableData[17])
            },
            "seasons-southern-hemisphere": {
                "jan": avaiConverter(tableData[12]),
                "feb": avaiConverter(tableData[13]),
                "mar": avaiConverter(tableData[14]),
                "apr": avaiConverter(tableData[15]),
                "may": avaiConverter(tableData[16]),
                "jun": avaiConverter(tableData[17]),
                "jul": avaiConverter(tableData[6]),
                "aug": avaiConverter(tableData[7]),
                "sep": avaiConverter(tableData[8]),
                "oct": avaiConverter(tableData[9]),
                "nov": avaiConverter(tableData[10]),
                "dec": avaiConverter(tableData[11])
            }
        }
        items[name] = item
    dumpData(items, key)
    return items
Пример #7
0
def scrapeDIYTools(key):
    url = urls.get(key)
    response = (requests.get(url, timeout=5))
    soup = BeautifulSoup(response.content, "html.parser")
    table = soup.find_all("table", {"class": "sortable"})
    itemList = []
    for item in table[0].find_all("tr")[1:]:
        itemInfo = []
        for td in item.find_all("td"):
            if not td.string is None:
                itemInfo.append(td.next.strip())
            else:
                itemInfo.append(td.next)
        itemObject = {
            "name": item.findChildren("td")[0].a.text,
        }
        try:
            itemObject["imageLink"] = item.findChildren("a")[1]['href']
        except AttributeError:
            itemObject["imageLink"] = None
        try:
            itemObject["materials"] = separateByBr(
                item.findChildren("td")[2]).strip("\n").split(",")
        except AttributeError:
            itemObject["materials"] = []
        try:
            itemObject["materialsImageLink"] = getImageLinks(
                item.findChildren("td")[2].find_all("img"))
        except AttributeError:
            itemObject["materialsImageLink"] = []
        try:
            itemObject["sizeLink"] = itemInfo[3].img.get("data-src")
        except AttributeError:
            itemObject["sizeLink"] = None
        try:
            itemObject["obtainedFrom"] = itemInfo[4].text
        except AttributeError:
            itemObject["obtainedFrom"] = None
        try:
            itemObject["price"] = int(itemInfo[5].strip().replace(",", ""))
        except:
            itemObject["price"] = None
        try:
            itemObject["isRecipeItem"] = avaiConverter(itemInfo[6])
        except:
            itemObject["isRecipeItem"] = None
        itemList.append(itemObject)
    dumpData(itemList, key)
    return itemList
Пример #8
0
def scrapeVillagers(key):
    url = urls.get(key)
    response = (requests.get(url, timeout=5))
    soup = BeautifulSoup(response.content, "html.parser")
    table = soup.find_all("table", {"class": "sortable"})
    itemList = []
    for item in table[0].find_all("tr")[1:]:
        itemObject = {
            "name": item.findChildren("td")[0].a.text,
            "imageLink": item.findChildren("a")[1]['href'],
            "personality": item.findChildren("td")[2].text.strip("\n")[1:],
            "species": item.findChildren("td")[3].text.strip("\n")[1:],
            "birthday": item.findChildren("td")[4].text.strip("\n")[1:],
            "catchPhrase": item.findChildren("td")[5].text.strip("\n")[1:]
        }
        itemList.append(itemObject)
    dumpData(itemList, key)
    return itemList
Пример #9
0
def scrapeVillagers(key):
    url = URLS.get(key)
    response = (requests.get(url, timeout=5))
    soup = BeautifulSoup(response.content, "html.parser")
    table = soup.find_all("table", {"class": "sortable"})
    items = {}
    for tr in table[0].find_all("tr")[1:]:
        name = tr.findChildren("td")[0].a.text
        item = {
            "name": name,
            "imageLink": tr.findChildren("td")[1].a['href'],
            "personality": tr.findChildren("td")[2].text.strip("\n")[1:],
            "species": tr.findChildren("td")[3].text.strip("\n")[1:],
            "birthday": tr.findChildren("td")[4].text.strip("\n")[1:],
            "catchPhrase": tr.findChildren("td")[5].text.strip("\n")[1:]
        }
        items[name] = item
    dumpData(items, key)
    return items
Пример #10
0
def scrapeFossils(key):  # same logic as scrapeBugs and scrapeFish
    url = URLS.get(key)
    response = (requests.get(url, timeout=5))
    soup = BeautifulSoup(response.content, "html.parser")
    table = soup.find_all("table", {"class": "sortable"})
    items = {}
    # Stand-alone fossils
    for tr in table[0].find_all("tr")[1:]:
        tableData = []
        for td in tr.find_all("td"):
            tableData.append(td.next.strip())
        name = tr.findChildren("td")[0].a.text
        item = {
            "name": name,
            "imageLink": tr.findChildren("a")[1]['href'],
            "price": getPriceWithBellsString(tableData[2]),
            "isMultipart": False
        }
        tableData.append(item)
        items[name] = item

    # Multi-part fossils
    for tr in table[1].find_all("tr")[1:]:
        tableData = []
        tds = tr.find_all("td")
        if not tds:
            currentCategory = tr.findChildren("a")[0].text
            continue
        for td in tr.find_all("td"):
            tableData.append(td.next.strip())
        name = tr.findChildren("td")[0].a.text
        item = {
            "name": name,
            "imageLink": tr.findChildren("a")[1]['href'],
            "price": getPriceWithBellsString(tableData[2]),
            "isMultipart": True,
            "category": currentCategory
        }
        items[name] = item
    dumpData(items, key)
    return items
Пример #11
0
def scrapeBugs(key):  # take url and return object containing bugs data
    url = urls.get(key)
    # create soup object
    response = (requests.get(url, timeout=5))
    soup = BeautifulSoup(response.content, "html.parser")
    # find the target table
    table = soup.find_all("table", {"class": "sortable"})
    # contains all items
    itemList = []
    # ignore first row as it just contains labels to the data
    for item in table[0].find_all("tr")[1:]:
        itemInfo = []
        # get rid of empty space
        for td in item.find_all("td"):
            itemInfo.append(td.next.strip())

        # find data and save it into an object
        itemObject = {
            "name": item.findChildren("td")[0].a.text,
            "imageLink": item.findChildren("a")[1]['href'],
            "price": int(itemInfo[2]),
            "location": item.findChildren("td")[3].text.strip('\n').strip(),
            "time": item.findChildren("small")[0].text,
            "jan": avaiConverter(itemInfo[5]),
            "feb": avaiConverter(itemInfo[6]),
            "mar": avaiConverter(itemInfo[7]),
            "apr": avaiConverter(itemInfo[8]),
            "may": avaiConverter(itemInfo[9]),
            "jun": avaiConverter(itemInfo[10]),
            "jul": avaiConverter(itemInfo[11]),
            "aug": avaiConverter(itemInfo[12]),
            "sep": avaiConverter(itemInfo[13]),
            "oct": avaiConverter(itemInfo[14]),
            "nov": avaiConverter(itemInfo[15]),
            "dec": avaiConverter(itemInfo[16])
        }
        itemList.append(itemObject)
    dumpData(itemList, key)
    return itemList
Пример #12
0
def scrapeFossils(key):  # same logic as scrapeBugs and scrapeFish
    url = urls.get(key)
    response = (requests.get(url, timeout=5))
    soup = BeautifulSoup(response.content, "html.parser")
    table = soup.find_all("table", {"class": "sortable"})
    itemList = []

    # Stand-alone fossils
    for item in table[0].find_all("tr")[1:]:
        itemInfo = []
        for td in item.find_all("td"):
            itemInfo.append(td.next.strip())
        itemObject = {
            "name": item.findChildren("a")[0].text,
            "imageLink": item.findChildren("a")[1]['href'],
            "price": getPriceWithBellsString(itemInfo[2]),
            "isMultipart": False
        }
        itemList.append(itemObject)

    # Multi-part fossils
    for item in table[1].find_all("tr")[1:]:
        itemInfo = []
        items = item.find_all("td")
        if not items:
            category = item.findChildren("a")[0].text
            continue
        for td in item.find_all("td"):
            itemInfo.append(td.next.strip())
        itemObject = {
            "name": item.findChildren("a")[0].text,
            "imageLink": item.findChildren("a")[1]['href'],
            "price": getPriceWithBellsString(itemInfo[2]),
            "isMultipart": True,
            "category": category
        }
        itemList.append(itemObject)
    dumpData(itemList, key)
    return itemList
Пример #13
0
def onData(interest, data):
    util.dump("Data received: ", interest.getName().toUri())
    util.dumpData(data)