def scrape(limit: int = 255) -> None: page = 1 while page <= limit: time.sleep(0.1) url = 'https://www.mtggoldfish.com/deck/custom/penny_dreadful?page={n}#online'.format( n=page) soup = BeautifulSoup( fetcher.internal.fetch(url, character_encoding='utf-8'), 'html.parser') raw_decks = soup.find_all('div', {'class': 'deck-tile'}) if len(raw_decks) == 0: logger.warning( 'No decks found in {url} so stopping.'.format(url=url)) break for raw_deck in raw_decks: d = Container({'source': 'MTG Goldfish'}) a = raw_deck.select_one('h2 > span.deck-price-online > a') d.identifier = re.findall(r'/deck/(\d+)#online', a.get('href'))[0] d.url = 'https://www.mtggoldfish.com/deck/{identifier}#online'.format( identifier=d.identifier) d.name = a.contents[0].strip() d.mtggoldfish_username = raw_deck.select_one( 'div.deck-tile-author').contents[0].strip() remove_by = re.match(r'^(by )?(.*)$', d.mtggoldfish_username) if remove_by: d.mtggoldfish_username = remove_by.group(2) d.created_date = scrape_created_date(d) time.sleep(1) d.cards = scrape_decklist(d) try: vivified = decklist.vivify(d.cards) # MTGG doesn't do any validation of cards so some decks with fail here with card names like 'Stroke of Genuineness'. except InvalidDataException as e: logger.warning( 'Rejecting decklist of deck with identifier {identifier} because of {e}' .format(identifier=d.identifier, e=e)) continue if len([ f for f in legality.legal_formats(vivified) if 'Penny Dreadful' in f ]) == 0: logger.warning( 'Rejecting deck with identifier {identifier} because it is not legal in any PD formats.' .format(identifier=d.identifier)) continue if len(d.cards) == 0: logger.warning( 'Rejecting deck with identifier {identifier} because it has no cards.' .format(identifier=d.identifier)) continue deck.add_deck(d) page += 1
def scrape_url(url): if not url.endswith('/'): url += '/' path = urllib.parse.urlparse(url).path slug = path.split('/')[2] raw_deck = dict() raw_deck['slug'] = slug raw_deck['url'] = url if is_authorised(): raw_deck.update(fetch_deck_details(raw_deck)) else: raw_deck.update(parse_printable(raw_deck)) raw_deck = set_values(raw_deck) vivified = decklist.vivify(raw_deck['cards']) if 'Penny Dreadful' not in legality.legal_formats(vivified): raise InvalidDataException('Deck is not legal in Penny Dreadful') else: return deck.add_deck(raw_deck)
def do_validation(self): if len(self.mtgo_username) == 0: self.errors['mtgo_username'] = "******" elif active_decks_by(self.mtgo_username.strip()): self.errors[ 'mtgo_username'] = "******" if len(self.name.strip()) == 0: self.errors['name'] = 'Deck Name is required' else: self.source = 'League' self.competition_id = db().value(active_competition_id_query()) self.identifier = identifier(self) self.url = url_for('competitions', competition_id=self.competition_id) self.decklist = self.decklist.strip() if len(self.decklist) == 0: self.errors['decklist'] = 'Decklist is required' else: self.cards = None if self.decklist.startswith('<?xml'): try: self.cards = decklist.parse_xml(self.decklist) except InvalidDataException as e: self.errors[ 'decklist'] = 'Unable to read .dek decklist. Try exporting from MTGO as Text and pasting the result.'.format( specific=str(e)) else: try: self.cards = decklist.parse(self.decklist) except InvalidDataException as e: self.errors[ 'decklist'] = '{specific}. Try exporting from MTGO as Text and pasting the result.'.format( specific=str(e)) if self.cards is not None: try: vivified = decklist.vivify(self.cards) errors = {} if 'Penny Dreadful' not in legality.legal_formats( vivified, None, errors): self.errors[ 'decklist'] = 'Deck is not legal in Penny Dreadful - {error}'.format( error=errors.get('Penny Dreadful')) except InvalidDataException as e: self.errors['decklist'] = str(e)
def scrape_url(url: str) -> deck.Deck: if not url.endswith('/'): url += '/' path = urllib.parse.urlparse(url).path slug = path.split('/')[2] raw_deck: DeckType = {} raw_deck['slug'] = slug raw_deck['url'] = url if is_authorised(): raw_deck.update(fetch_deck_details(raw_deck)) else: raw_deck.update(parse_printable(raw_deck)) raw_deck = set_values(raw_deck) vivified = decklist.vivify(raw_deck['cards']) errors: Dict[str, str] = {} if 'Penny Dreadful' not in legality.legal_formats(vivified, None, errors): print(repr(raw_deck['cards'])) raise InvalidDataException('Deck is not legal in Penny Dreadful - {error}'.format(error=errors.get('Penny Dreadful'))) else: return deck.add_deck(raw_deck)
def vivify_deck(self): try: self.deck = decklist.vivify(self.cards) except InvalidDataException as e: self.errors['decklist'] = str(e)