Пример #1
0
def scrape(limit: int = 255) -> None:
    page = 1
    while page <= limit:
        time.sleep(0.1)
        url = 'https://www.mtggoldfish.com/deck/custom/penny_dreadful?page={n}#online'.format(
            n=page)
        soup = BeautifulSoup(
            fetcher.internal.fetch(url, character_encoding='utf-8'),
            'html.parser')
        raw_decks = soup.find_all('div', {'class': 'deck-tile'})
        if len(raw_decks) == 0:
            logger.warning(
                'No decks found in {url} so stopping.'.format(url=url))
            break
        for raw_deck in raw_decks:
            d = Container({'source': 'MTG Goldfish'})
            a = raw_deck.select_one('h2 > span.deck-price-online > a')
            d.identifier = re.findall(r'/deck/(\d+)#online', a.get('href'))[0]
            d.url = 'https://www.mtggoldfish.com/deck/{identifier}#online'.format(
                identifier=d.identifier)
            d.name = a.contents[0].strip()
            d.mtggoldfish_username = raw_deck.select_one(
                'div.deck-tile-author').contents[0].strip()
            remove_by = re.match(r'^(by )?(.*)$', d.mtggoldfish_username)
            if remove_by:
                d.mtggoldfish_username = remove_by.group(2)
            d.created_date = scrape_created_date(d)
            time.sleep(1)
            d.cards = scrape_decklist(d)
            try:
                vivified = decklist.vivify(d.cards)
            # MTGG doesn't do any validation of cards so some decks with fail here with card names like 'Stroke of Genuineness'.
            except InvalidDataException as e:
                logger.warning(
                    'Rejecting decklist of deck with identifier {identifier} because of {e}'
                    .format(identifier=d.identifier, e=e))
                continue
            if len([
                    f for f in legality.legal_formats(vivified)
                    if 'Penny Dreadful' in f
            ]) == 0:
                logger.warning(
                    'Rejecting deck with identifier {identifier} because it is not legal in any PD formats.'
                    .format(identifier=d.identifier))
                continue
            if len(d.cards) == 0:
                logger.warning(
                    'Rejecting deck with identifier {identifier} because it has no cards.'
                    .format(identifier=d.identifier))
                continue
            deck.add_deck(d)
        page += 1
Пример #2
0
def scrape_url(url):
    if not url.endswith('/'):
        url += '/'
    path = urllib.parse.urlparse(url).path
    slug = path.split('/')[2]
    raw_deck = dict()
    raw_deck['slug'] = slug
    raw_deck['url'] = url
    if is_authorised():
        raw_deck.update(fetch_deck_details(raw_deck))
    else:
        raw_deck.update(parse_printable(raw_deck))
    raw_deck = set_values(raw_deck)
    vivified = decklist.vivify(raw_deck['cards'])
    if 'Penny Dreadful' not in legality.legal_formats(vivified):
        raise InvalidDataException('Deck is not legal in Penny Dreadful')
    else:
        return deck.add_deck(raw_deck)
Пример #3
0
 def do_validation(self):
     if len(self.mtgo_username) == 0:
         self.errors['mtgo_username'] = "******"
     elif active_decks_by(self.mtgo_username.strip()):
         self.errors[
             'mtgo_username'] = "******"
     if len(self.name.strip()) == 0:
         self.errors['name'] = 'Deck Name is required'
     else:
         self.source = 'League'
         self.competition_id = db().value(active_competition_id_query())
         self.identifier = identifier(self)
         self.url = url_for('competitions',
                            competition_id=self.competition_id)
     self.decklist = self.decklist.strip()
     if len(self.decklist) == 0:
         self.errors['decklist'] = 'Decklist is required'
     else:
         self.cards = None
         if self.decklist.startswith('<?xml'):
             try:
                 self.cards = decklist.parse_xml(self.decklist)
             except InvalidDataException as e:
                 self.errors[
                     'decklist'] = 'Unable to read .dek decklist. Try exporting from MTGO as Text and pasting the result.'.format(
                         specific=str(e))
         else:
             try:
                 self.cards = decklist.parse(self.decklist)
             except InvalidDataException as e:
                 self.errors[
                     'decklist'] = '{specific}. Try exporting from MTGO as Text and pasting the result.'.format(
                         specific=str(e))
         if self.cards is not None:
             try:
                 vivified = decklist.vivify(self.cards)
                 errors = {}
                 if 'Penny Dreadful' not in legality.legal_formats(
                         vivified, None, errors):
                     self.errors[
                         'decklist'] = 'Deck is not legal in Penny Dreadful - {error}'.format(
                             error=errors.get('Penny Dreadful'))
             except InvalidDataException as e:
                 self.errors['decklist'] = str(e)
Пример #4
0
def scrape_url(url: str) -> deck.Deck:
    if not url.endswith('/'):
        url += '/'
    path = urllib.parse.urlparse(url).path
    slug = path.split('/')[2]
    raw_deck: DeckType = {}
    raw_deck['slug'] = slug
    raw_deck['url'] = url
    if is_authorised():
        raw_deck.update(fetch_deck_details(raw_deck))
    else:
        raw_deck.update(parse_printable(raw_deck))
    raw_deck = set_values(raw_deck)
    vivified = decklist.vivify(raw_deck['cards'])
    errors: Dict[str, str] = {}
    if 'Penny Dreadful' not in legality.legal_formats(vivified, None, errors):
        print(repr(raw_deck['cards']))
        raise InvalidDataException('Deck is not legal in Penny Dreadful - {error}'.format(error=errors.get('Penny Dreadful')))
    else:
        return deck.add_deck(raw_deck)
Пример #5
0
 def vivify_deck(self):
     try:
         self.deck = decklist.vivify(self.cards)
     except InvalidDataException as e:
         self.errors['decklist'] = str(e)