def scrapeDIYWalls(key): url = urls.get(key) response = (requests.get(url, timeout=5)) soup = BeautifulSoup(response.content, "html.parser") table = soup.find_all("table", {"class": "sortable"}) itemList = [] for item in table[0].find_all("tr")[1:]: # change to [1:] when done itemObject = {"name": item.findChildren("td")[0].text.strip("\n")} if item.findChildren("a")[1]['href']: itemObject["imageLink"] = item.findChildren("a")[1]['href'] if item.findChildren("td")[2]: itemObject["materials"] = separateByBr( item.findChildren("td")[2]).strip("\n").split(",") itemObject["materialsImageLink"] = getImageLinks( item.findChildren("td")[2].find_all("img")) if item.findChildren("td")[3].findChildren("a"): itemObject["sizeLink"] = item.findChildren("td")[3].findChildren( "a")[0]['href'] if item.findChildren("td")[4].text: itemObject["obtainedFrom"] = item.findChildren("td")[4].text.strip( '\n') if item.findChildren("td")[5].text.strip().replace(",", ""): itemObject["price"] = int( item.findChildren("td")[5].text.strip().replace(",", "")) itemList.append(itemObject) dumpData(itemList, key) return itemList
def scrapeDIYOthers(key): url = URLS.get(key) response = (requests.get(url, timeout=5)) soup = BeautifulSoup(response.content, "html.parser") table = soup.find_all("table", {"class": "roundy"}) items = {} for tr in table[0].find_all("tr")[1:]: name = tr.findChildren("td")[0].a.text item = { "name": name, } items[name] = item if tr.findChildren("a")[1]['href']: item["imageLink"] = tr.findChildren("a")[1]['href'] if tr.findChildren("td")[2]: item["materials"] = separateByBr( tr.findChildren("td")[2]).strip("\n").split(",") if tr.findChildren("td")[2].find_all("img"): item["materialsImageLink"] = getImageLinks( tr.findChildren("td")[2].find_all("img")) if tr.findChildren("td")[3].img.get("data-src"): item["sizeImageLink"] = tr.findChildren("td")[3].img.get( "data-src") if tr.findChildren("td")[4].text: item["obtainedFrom"] = tr.findChildren("td")[4].text.strip( "\n").splitlines() if tr.findChildren("td")[5]: item["price"] = int( tr.findChildren("td")[5].next.strip().replace(",", "")) dumpData(items, key) return items
def scrapeFish(key): # same logic as scrapeBugs url = urls.get(key) response = (requests.get(url, timeout=5)) soup = BeautifulSoup(response.content, "html.parser") table = soup.find_all("table", {"class": "sortable"}) itemList = [] for item in table[0].find_all("tr")[1:]: itemInfo = [] for td in item.find_all("td"): itemInfo.append(td.next.strip()) itemObject = { "name": item.findChildren("a")[0].text, "imageLink": item.findChildren("a")[1]['href'], "price": int(itemInfo[2]), "location": item.findChildren("td")[3].text.strip('\n').strip(), "shadowSize": itemInfo[4], # specific to fish "time": item.findChildren("small")[0].text, "jan": avaiConverter(itemInfo[6]), "feb": avaiConverter(itemInfo[7]), "mar": avaiConverter(itemInfo[8]), "apr": avaiConverter(itemInfo[9]), "may": avaiConverter(itemInfo[10]), "jun": avaiConverter(itemInfo[11]), "jul": avaiConverter(itemInfo[12]), "aug": avaiConverter(itemInfo[13]), "sep": avaiConverter(itemInfo[14]), "oct": avaiConverter(itemInfo[15]), "nov": avaiConverter(itemInfo[16]), "dec": avaiConverter(itemInfo[17]) } itemList.append(itemObject) dumpData(itemList, key) return itemList
def scrapeBugs(key): # take url and return object containing bugs data url = URLS.get(key) # create soup object response = (requests.get(url, timeout=5)) soup = BeautifulSoup(response.content, "html.parser") # find the target table table = soup.find_all("table", {"class": "sortable"}) items = {} # go through each tr in the table, ignoring the table header for tr in table[0].find_all("tr")[1:]: tableData = [] # get rid of empty space for td in tr.find_all("td"): tableData.append(td.next.strip()) # find data and save it into an object name = tr.findChildren("td")[0].a.text item = { "name": name, "imageLink": tr.findChildren("a")[1]['href'], "price": int(tableData[2]), "location": tr.findChildren("td")[3].text.strip('\n').strip(), "time": tr.findChildren("small")[0].text, "seasons-northern-hemisphere": { "jan": avaiConverter(tableData[5]), "feb": avaiConverter(tableData[6]), "mar": avaiConverter(tableData[7]), "apr": avaiConverter(tableData[8]), "may": avaiConverter(tableData[9]), "jun": avaiConverter(tableData[10]), "jul": avaiConverter(tableData[11]), "aug": avaiConverter(tableData[12]), "sep": avaiConverter(tableData[13]), "oct": avaiConverter(tableData[14]), "nov": avaiConverter(tableData[15]), "dec": avaiConverter(tableData[16]) }, "seasons-southern-hemisphere": { # shift northern hemisphere by 6 months "jan": avaiConverter(tableData[11]), "feb": avaiConverter(tableData[12]), "mar": avaiConverter(tableData[13]), "apr": avaiConverter(tableData[14]), "may": avaiConverter(tableData[15]), "jun": avaiConverter(tableData[16]), "jul": avaiConverter(tableData[5]), "aug": avaiConverter(tableData[6]), "sep": avaiConverter(tableData[7]), "oct": avaiConverter(tableData[8]), "nov": avaiConverter(tableData[9]), "dec": avaiConverter(tableData[10]) } } items[name] = item dumpData(items, key) # return for debugging return items
def onData(self, interest, data): """ FileSync: To be written (TBW) """ # TODO: Verify packet self.keyChain.verifyData(data, self.onVerified, self.onVerifyFailed) util.dump("Got data packet with name", data.getName().toUri()) util.dumpData(data) content = fileSyncBuf_pb2.FileSync() content.ParseFromString(data.getContent().toRawStr()) print("Type: " + str(content.dataType) + ", data: " + content.data) if self.getNowMilliseconds() - content.timestamp * 1000.0 < 120000.0: # Use getattr because "from" is a reserved keyword. name = getattr(content, "from") prefix = data.getName().getPrefix(-2).toUri() sessionNo = int(data.getName().get(-2).toEscapedString()) sequenceNo = int(data.getName().get(-1).toEscapedString()) nameAndSession = name + str(sessionNo) l = 0 # Update roster. while l < len(self.roster): entry = self.roster[l] tempName = entry[0:len(entry) - 10] tempSessionNo = int(entry[len(entry) - 10:]) if (name != tempName and content.dataType != fileSyncBuf_pb2.FileSync.UNSUBSCRIBE): l += 1 else: if name == tempName and sessionNo > tempSessionNo: self.roster[l] = nameAndSession break if l == len(self.roster): self.roster.append(nameAndSession) print(name + ": Subscribe") # Use getattr because "from" is a reserved keyword. if (content.dataType == fileSyncBuf_pb2.FileSync.UPDATE and not self.isRecoverySyncState and getattr(content, "from") != self.screenName): self.onRecievedFileUpdate(content) elif content.dataType == fileSyncBuf_pb2.FileSync.UNSUBSCRIBE: # leave message try: n = self.roster.index(nameAndSession) if name != self.screenName: self.roster.pop(n) print(name + ": Unsubscribe") except ValueError: pass
def scrapeFish(key): # same logic as scrapeBugs url = URLS.get(key) response = (requests.get(url, timeout=5)) soup = BeautifulSoup(response.content, "html.parser") table = soup.find_all("table", {"class": "sortable"}) items = {} for tr in table[0].find_all("tr")[1:]: tableData = [] for td in tr.find_all("td"): tableData.append(td.next.strip()) name = tr.findChildren("td")[0].a.text item = { "name": name, "imageLink": tr.findChildren("a")[1]['href'], "price": int(tableData[2]), "location": tr.findChildren("td")[3].text.strip('\n').strip(), "shadowSize": tableData[4], # specific to fish "time": tr.findChildren("small")[0].text, "seasons-northern-hemisphere": { "jan": avaiConverter(tableData[6]), "feb": avaiConverter(tableData[7]), "mar": avaiConverter(tableData[8]), "apr": avaiConverter(tableData[9]), "may": avaiConverter(tableData[10]), "jun": avaiConverter(tableData[11]), "jul": avaiConverter(tableData[12]), "aug": avaiConverter(tableData[13]), "sep": avaiConverter(tableData[14]), "oct": avaiConverter(tableData[15]), "nov": avaiConverter(tableData[16]), "dec": avaiConverter(tableData[17]) }, "seasons-southern-hemisphere": { "jan": avaiConverter(tableData[12]), "feb": avaiConverter(tableData[13]), "mar": avaiConverter(tableData[14]), "apr": avaiConverter(tableData[15]), "may": avaiConverter(tableData[16]), "jun": avaiConverter(tableData[17]), "jul": avaiConverter(tableData[6]), "aug": avaiConverter(tableData[7]), "sep": avaiConverter(tableData[8]), "oct": avaiConverter(tableData[9]), "nov": avaiConverter(tableData[10]), "dec": avaiConverter(tableData[11]) } } items[name] = item dumpData(items, key) return items
def scrapeDIYTools(key): url = urls.get(key) response = (requests.get(url, timeout=5)) soup = BeautifulSoup(response.content, "html.parser") table = soup.find_all("table", {"class": "sortable"}) itemList = [] for item in table[0].find_all("tr")[1:]: itemInfo = [] for td in item.find_all("td"): if not td.string is None: itemInfo.append(td.next.strip()) else: itemInfo.append(td.next) itemObject = { "name": item.findChildren("td")[0].a.text, } try: itemObject["imageLink"] = item.findChildren("a")[1]['href'] except AttributeError: itemObject["imageLink"] = None try: itemObject["materials"] = separateByBr( item.findChildren("td")[2]).strip("\n").split(",") except AttributeError: itemObject["materials"] = [] try: itemObject["materialsImageLink"] = getImageLinks( item.findChildren("td")[2].find_all("img")) except AttributeError: itemObject["materialsImageLink"] = [] try: itemObject["sizeLink"] = itemInfo[3].img.get("data-src") except AttributeError: itemObject["sizeLink"] = None try: itemObject["obtainedFrom"] = itemInfo[4].text except AttributeError: itemObject["obtainedFrom"] = None try: itemObject["price"] = int(itemInfo[5].strip().replace(",", "")) except: itemObject["price"] = None try: itemObject["isRecipeItem"] = avaiConverter(itemInfo[6]) except: itemObject["isRecipeItem"] = None itemList.append(itemObject) dumpData(itemList, key) return itemList
def scrapeVillagers(key): url = urls.get(key) response = (requests.get(url, timeout=5)) soup = BeautifulSoup(response.content, "html.parser") table = soup.find_all("table", {"class": "sortable"}) itemList = [] for item in table[0].find_all("tr")[1:]: itemObject = { "name": item.findChildren("td")[0].a.text, "imageLink": item.findChildren("a")[1]['href'], "personality": item.findChildren("td")[2].text.strip("\n")[1:], "species": item.findChildren("td")[3].text.strip("\n")[1:], "birthday": item.findChildren("td")[4].text.strip("\n")[1:], "catchPhrase": item.findChildren("td")[5].text.strip("\n")[1:] } itemList.append(itemObject) dumpData(itemList, key) return itemList
def scrapeVillagers(key): url = URLS.get(key) response = (requests.get(url, timeout=5)) soup = BeautifulSoup(response.content, "html.parser") table = soup.find_all("table", {"class": "sortable"}) items = {} for tr in table[0].find_all("tr")[1:]: name = tr.findChildren("td")[0].a.text item = { "name": name, "imageLink": tr.findChildren("td")[1].a['href'], "personality": tr.findChildren("td")[2].text.strip("\n")[1:], "species": tr.findChildren("td")[3].text.strip("\n")[1:], "birthday": tr.findChildren("td")[4].text.strip("\n")[1:], "catchPhrase": tr.findChildren("td")[5].text.strip("\n")[1:] } items[name] = item dumpData(items, key) return items
def scrapeFossils(key): # same logic as scrapeBugs and scrapeFish url = URLS.get(key) response = (requests.get(url, timeout=5)) soup = BeautifulSoup(response.content, "html.parser") table = soup.find_all("table", {"class": "sortable"}) items = {} # Stand-alone fossils for tr in table[0].find_all("tr")[1:]: tableData = [] for td in tr.find_all("td"): tableData.append(td.next.strip()) name = tr.findChildren("td")[0].a.text item = { "name": name, "imageLink": tr.findChildren("a")[1]['href'], "price": getPriceWithBellsString(tableData[2]), "isMultipart": False } tableData.append(item) items[name] = item # Multi-part fossils for tr in table[1].find_all("tr")[1:]: tableData = [] tds = tr.find_all("td") if not tds: currentCategory = tr.findChildren("a")[0].text continue for td in tr.find_all("td"): tableData.append(td.next.strip()) name = tr.findChildren("td")[0].a.text item = { "name": name, "imageLink": tr.findChildren("a")[1]['href'], "price": getPriceWithBellsString(tableData[2]), "isMultipart": True, "category": currentCategory } items[name] = item dumpData(items, key) return items
def scrapeBugs(key): # take url and return object containing bugs data url = urls.get(key) # create soup object response = (requests.get(url, timeout=5)) soup = BeautifulSoup(response.content, "html.parser") # find the target table table = soup.find_all("table", {"class": "sortable"}) # contains all items itemList = [] # ignore first row as it just contains labels to the data for item in table[0].find_all("tr")[1:]: itemInfo = [] # get rid of empty space for td in item.find_all("td"): itemInfo.append(td.next.strip()) # find data and save it into an object itemObject = { "name": item.findChildren("td")[0].a.text, "imageLink": item.findChildren("a")[1]['href'], "price": int(itemInfo[2]), "location": item.findChildren("td")[3].text.strip('\n').strip(), "time": item.findChildren("small")[0].text, "jan": avaiConverter(itemInfo[5]), "feb": avaiConverter(itemInfo[6]), "mar": avaiConverter(itemInfo[7]), "apr": avaiConverter(itemInfo[8]), "may": avaiConverter(itemInfo[9]), "jun": avaiConverter(itemInfo[10]), "jul": avaiConverter(itemInfo[11]), "aug": avaiConverter(itemInfo[12]), "sep": avaiConverter(itemInfo[13]), "oct": avaiConverter(itemInfo[14]), "nov": avaiConverter(itemInfo[15]), "dec": avaiConverter(itemInfo[16]) } itemList.append(itemObject) dumpData(itemList, key) return itemList
def scrapeFossils(key): # same logic as scrapeBugs and scrapeFish url = urls.get(key) response = (requests.get(url, timeout=5)) soup = BeautifulSoup(response.content, "html.parser") table = soup.find_all("table", {"class": "sortable"}) itemList = [] # Stand-alone fossils for item in table[0].find_all("tr")[1:]: itemInfo = [] for td in item.find_all("td"): itemInfo.append(td.next.strip()) itemObject = { "name": item.findChildren("a")[0].text, "imageLink": item.findChildren("a")[1]['href'], "price": getPriceWithBellsString(itemInfo[2]), "isMultipart": False } itemList.append(itemObject) # Multi-part fossils for item in table[1].find_all("tr")[1:]: itemInfo = [] items = item.find_all("td") if not items: category = item.findChildren("a")[0].text continue for td in item.find_all("td"): itemInfo.append(td.next.strip()) itemObject = { "name": item.findChildren("a")[0].text, "imageLink": item.findChildren("a")[1]['href'], "price": getPriceWithBellsString(itemInfo[2]), "isMultipart": True, "category": category } itemList.append(itemObject) dumpData(itemList, key) return itemList
def onData(interest, data): util.dump("Data received: ", interest.getName().toUri()) util.dumpData(data)