Пример #1
0
def scrape(limit: int = 1) -> None:
    page = 1
    while page <= limit:
        time.sleep(0.1)
        url = 'https://www.mtggoldfish.com/deck/custom/penny_dreadful?page={n}#online'.format(
            n=page)
        soup = BeautifulSoup(
            fetcher.internal.fetch(url, character_encoding='utf-8'),
            'html.parser')
        raw_decks = soup.find_all('div', {'class': 'deck-tile'})
        if len(raw_decks) == 0:
            logger.warning(
                'No decks found in {url} so stopping.'.format(url=url))
            break
        for raw_deck in raw_decks:
            d = Container({'source': 'MTG Goldfish'})
            a = raw_deck.select_one('h2 > span.deck-price-online > a')
            d.identifier = re.findall(r'/deck/(\d+)#online', a.get('href'))[0]
            d.url = 'https://www.mtggoldfish.com/deck/{identifier}#online'.format(
                identifier=d.identifier)
            d.name = a.contents[0].strip()
            d.mtggoldfish_username = without_by(
                raw_deck.select_one(
                    'div.deck-tile-author').contents[0].strip())
            try:
                d.created_date = scrape_created_date(d)
            except InvalidDataException as e:
                msg = f'Got {e} trying to find a created_date in {d}, {raw_deck}'
                logger.error(msg)
                raise InvalidDataException(msg)
            time.sleep(1)
            d.cards = scrape_decklist(d)
            err = vivify_or_error(d)
            if err:
                logger.warning(err)
                continue
            deck.add_deck(d)
        page += 1
Пример #2
0
def normalize(d: Deck) -> str:
    try:
        name = d.original_name
        name = name.lower()
        name = replace_space_alternatives(name)
        name = remove_pd(name)
        name = remove_hashtags(name)
        name = remove_brackets(name)
        name = strip_leading_punctuation(name)
        unabbreviated = expand_common_abbreviations(name)
        if unabbreviated != name or name in ABBREVIATIONS.values():
            name = unabbreviated
        elif whitelisted(name):
            pass
        else:
            name = add_colors_if_no_deckname(name, d.get('colors'))
            name = normalize_colors(name)
            name = add_archetype_if_just_colors(name, d.get('archetype_name'))
            name = remove_mono_if_not_first_word(name)
        name = ucase_trailing_roman_numerals(name)
        return titlecase.titlecase(name)
    except ValueError:
        raise InvalidDataException('Failed to normalize {d}'.format(d=repr(d)))
Пример #3
0
def insert_set(s) -> None:
    sql = 'INSERT INTO `set` ('
    sql += ', '.join(name for name, prop in card.set_properties().items()
                     if prop['mtgjson'])
    sql += ') VALUES ('
    sql += ', '.join('%s' for name, prop in card.set_properties().items()
                     if prop['mtgjson'])
    sql += ')'
    values = [
        date2int(s.get(database2json(name)), name)
        for name, prop in card.set_properties().items() if prop['mtgjson']
    ]
    db().execute(sql, values)
    set_id = db().last_insert_rowid()
    set_cards = s.get('cards', [])
    fix_bad_mtgjson_set_cards_data(set_cards)
    fix_mtgjson_melded_cards_array(set_cards)
    for c in set_cards:
        _, card_id = try_find_card_id(c)
        if card_id is None:
            raise InvalidDataException("Can't find id for: '{n}': {ns}".format(
                n=c['name'], ns='; '.join(c.get('names', []))))
        sql = 'INSERT INTO printing (card_id, set_id, '
        sql += ', '.join(name
                         for name, prop in card.printing_properties().items()
                         if prop['mtgjson'])
        sql += ') VALUES (%s, %s, '
        sql += ', '.join('%s'
                         for name, prop in card.printing_properties().items()
                         if prop['mtgjson'])
        sql += ')'
        cards_values = [card_id, set_id] + [
            c.get(database2json(name))
            for name, prop in card.printing_properties().items()
            if prop['mtgjson']
        ]
        db().execute(sql, cards_values)
Пример #4
0
def medal_winners(s: str) -> Dict[str, int]:
    winners = {}
    # The HTML of this page is so badly malformed that BeautifulSoup cannot really help us with this bit.
    rows = re.findall('<tr style=">(.*?)</tr>', s, re.MULTILINE | re.DOTALL)
    for row in rows:
        player = BeautifulSoup(row, 'html.parser').find_all('td')[2]
        if player.find('img'):
            mtgo_username = aliased(player.a.contents[0])
            img = re.sub(r'styles/Chandra/images/(.*?)\.png', r'\1',
                         player.img['src'])
            if img == WINNER:
                winners[mtgo_username] = 1
            elif img == SECOND:
                winners[mtgo_username] = 2
            elif img == TOP_4:
                winners[mtgo_username] = 3
            elif img == TOP_8:
                winners[mtgo_username] = 5
            elif img == 'verified':
                pass
            else:
                raise InvalidDataException(
                    'Unknown player image `{img}`'.format(img=img))
    return winners
Пример #5
0
def get_source_id(source: str) -> int:
    sql = 'SELECT id FROM source WHERE name = %s'
    source_id = db().value(sql, [source])
    if not source_id:
        raise InvalidDataException('Unknown source: `{source}`'.format(source=source))
    return source_id
Пример #6
0
 def lookup(gatherling_id: int) -> deck.Deck:
     try:
         return decks_by_identifier[gatherling_id]
     except KeyError:
         raise InvalidDataException("Unable to find deck with gatherling id '{0}'".format(gatherling_id))
Пример #7
0
def update_database(new_date: datetime.datetime) -> None:
    # pylint: disable=too-many-locals
    db().begin('update_database')
    db().execute('DELETE FROM scryfall_version')
    # In order to rebuild the card table, we must delete (and rebuild) all tables with a FK to it
    db().execute('DROP TABLE IF EXISTS _cache_card')
    db().execute("""
        DELETE FROM card_color;
        DELETE FROM card_color_identity;
        DELETE FROM card_legality;
        DELETE FROM card_bug;
        DELETE FROM face;
        DELETE FROM printing;
        DELETE FROM card;
        DELETE FROM `set`;
    """)

    sets = {}
    for s in fetcher.all_sets():
        sets[s['code']] = insert_set(s)

    every_card_printing = fetcher.all_cards()

    rarity_ids = {
        x['name']: x['id']
        for x in db().select('SELECT id, name FROM rarity;')
    }
    scryfall_to_internal_rarity = {
        'common': ('Common', rarity_ids['Common']),
        'uncommon': ('Uncommon', rarity_ids['Uncommon']),
        'rare': ('Rare', rarity_ids['Rare']),
        'mythic': ('Mythic Rare', rarity_ids['Mythic Rare'])
    }

    # Strategy:
    # Iterate through all printings of each cards, building several queries to be executed at the end.
    # If we hit a new card, add it to the queries the several tables tracking cards:
    #      card, face, card_color, card_color_identity, printing
    # If it's a printing of a card we already have, just add to the printing query
    # We need to special case the result (melded) side of meld cards, due to their general weirdness.

    cards: Dict[str, int] = {}

    meld_result_printings = []

    card_query = 'INSERT INTO `card` (id, layout) VALUES '
    card_values = []

    card_color_query = 'INSERT IGNORE INTO `card_color` (card_id, color_id) VALUES '
    card_color_values = []

    card_color_identity_query = 'INSERT IGNORE INTO `card_color_identity` (card_id, color_id) VALUES '
    card_color_identity_values = []

    face_query = 'INSERT INTO `face` (card_id, position, '
    face_query += ', '.join(name
                            for name, prop in card.face_properties().items()
                            if prop['scryfall'])
    face_query += ') VALUES '
    face_values = []

    printing_query = 'INSERT INTO `printing` (card_id, set_id, '
    printing_query += 'system_id, rarity, flavor, artist, number, multiverseid, watermark, border, timeshifted, reserved, mci_number, rarity_id'
    printing_query += ') VALUES'
    printing_values = []

    colors_raw = db().select(
        'SELECT id, symbol FROM color GROUP BY name ORDER BY id;')
    colors = {c['symbol'].upper(): c['id'] for c in colors_raw}

    next_card_id = 1

    card_legality_query = 'INSERT IGNORE INTO `card_legality` (card_id, format_id, legality) VALUES '
    card_legality_values = []

    for p in every_card_printing:
        # Exclude little girl because {hw} mana is a problem rn.
        if p['name'] == 'Little Girl':
            continue

        if is_meld_result(p):
            meld_result_printings.append(p)

        rarity, rarity_id = scryfall_to_internal_rarity[p['rarity']]

        try:
            set_id = sets[p['set']]
        except KeyError:
            raise InvalidDataException(
                f"We think we should have set {p['set']} but it's not in {sets} (from {p})"
            )

        # If we already have the card, all we need is to record the next printing of it
        if p['name'] in cards:
            card_id = cards[p['name']]
            printing_values.append(
                printing_value(p, card_id, set_id, rarity_id, rarity))
            continue

        card_id = next_card_id
        next_card_id += 1

        cards[p['name']] = card_id
        card_values.append("({i},'{l}')".format(i=card_id, l=p['layout']))

        if p['layout'] in [
                'augment', 'emblem', 'host', 'leveler', 'meld', 'normal',
                'planar', 'saga', 'scheme', 'token', 'vanguard'
        ]:
            face_values.append(single_face_value(p, card_id))
        elif p['layout'] in [
                'double_faced_token', 'flip', 'split', 'transform'
        ]:
            face_values += multiple_faces_values(p, card_id)

        for color in p.get('colors', []):
            color_id = colors[color]
            card_color_values.append(f'({card_id}, {color_id})')

        for color in p.get('color_identity', []):
            color_id = colors[color]
            card_color_identity_values.append(f'({card_id}, {color_id})')

        for format_, status in p.get('legalities', {}).items():
            if status == 'not_legal':
                continue
            # Strictly speaking we could drop all this capitalizing and use what Scryfall sends us as the canonical name as it's just a holdover from mtgjson.
            format_id = get_format_id(format_.capitalize(), True)
            internal_status = status.capitalize()
            card_legality_values.append(
                f"({card_id}, {format_id}, '{internal_status}')")

        cards[p['name']] = card_id

        printing_values.append(
            printing_value(p, card_id, set_id, rarity_id, rarity))

    card_query += ',\n'.join(card_values)
    card_query += ';'
    db().execute(card_query)

    card_color_query += ',\n'.join(card_color_values) + ';'
    db().execute(card_color_query)
    card_color_identity_query += ',\n'.join(card_color_identity_values) + ';'
    db().execute(card_color_identity_query)

    for p in meld_result_printings:
        insert_meld_result_faces(p, cards)

    printing_query += ',\n'.join(printing_values)
    printing_query += ';'
    db().execute(printing_query)

    face_query += ',\n'.join(face_values)
    face_query += ';'
    db().execute(face_query)

    card_legality_query += ',\n'.join(card_legality_values)
    card_legality_query += ';'
    db().execute(card_legality_query)

    # Create the current Penny Dreadful format.
    get_format_id('Penny Dreadful', True)
    update_bugged_cards()
    update_pd_legality()
    db().execute('INSERT INTO scryfall_version (last_updated) VALUES (%s)',
                 [dtutil.dt2ts(new_date)])
    db().commit('update_database')
Пример #8
0
def add_deck(params):
    if not params.get('mtgo_username') and not params.get(
            'tappedout_username') and not params.get('mtggoldfish_username'):
        raise InvalidDataException(
            'Did not find a username in {params}'.format(params=params))
    person_id = get_or_insert_person_id(params.get('mtgo_username'),
                                        params.get('tappedout_username'),
                                        params.get('mtggoldfish_username'))
    deck_id = get_deck_id(params['source'], params['identifier'])
    if deck_id:
        add_cards(deck_id, params['cards'])
        return deck_id
    created_date = params.get('created_date')
    if not created_date:
        created_date = time.time()
    archetype_id = get_archetype_id(params.get('archetype'))
    for result in ['wins', 'losses', 'draws']:
        if params.get('competition_id') and not params.get(result):
            params[result] = 0
    sql = """INSERT INTO deck (
        created_date,
        updated_date,
        person_id,
        source_id,
        url,
        identifier,
        name,
        competition_id,
        archetype_id,
        resource_uri,
        featured_card,
        score,
        thumbnail_url,
        small_thumbnail_url,
        wins,
        losses,
        draws,
        finish,
        reviewed
    ) VALUES (
         IFNULL(%s, UNIX_TIMESTAMP()),  UNIX_TIMESTAMP(), %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, FALSE
    )"""
    values = [
        created_date, person_id,
        get_source_id(params['source']), params['url'], params['identifier'],
        params['name'],
        params.get('competition_id'), archetype_id,
        params.get('resource_uri'),
        params.get('featured_card'),
        params.get('score'),
        params.get('thumbnail_url'),
        params.get('small_thumbnail_url'),
        params.get('wins'),
        params.get('losses'),
        params.get('draws'),
        params.get('finish')
    ]
    deck_id = db().insert(sql, values)
    add_cards(deck_id, params['cards'])
    d = load_deck(deck_id)
    prime_cache(d)
    return d
Пример #9
0
def add_deck(params: RawDeckDescription) -> Deck:
    if not params.get('mtgo_username') and not params.get(
            'tappedout_username') and not params.get('mtggoldfish_username'):
        raise InvalidDataException(
            'Did not find a username in {params}'.format(params=params))
    person_id = get_or_insert_person_id(params.get('mtgo_username'),
                                        params.get('tappedout_username'),
                                        params.get('mtggoldfish_username'))
    deck_id = get_deck_id(params['source'], params['identifier'])
    cards = params['cards']
    if deck_id:
        db().begin('replace_deck_cards')
        db().execute('UPDATE deck SET decklist_hash = %s WHERE id = %s',
                     [get_deckhash(cards), deck_id])
        db().execute('DELETE FROM deck_card WHERE deck_id = %s', [deck_id])
        add_cards(deck_id, cards)
        db().commit('replace_deck_cards')
        d = load_deck(deck_id)
        prime_cache(d)
        return d
    created_date = params.get('created_date')
    if not created_date:
        created_date = time.time()
    archetype_id = get_archetype_id(params.get('archetype'))
    for result in ['wins', 'losses', 'draws']:
        if params.get('competition_id') and not params.get(result):
            params[result] = 0  # type: ignore
    sql = """INSERT INTO deck (
        created_date,
        updated_date,
        person_id,
        source_id,
        url,
        identifier,
        name,
        competition_id,
        archetype_id,
        resource_uri,
        featured_card,
        score,
        thumbnail_url,
        small_thumbnail_url,
        finish,
        decklist_hash,
        reviewed
    ) VALUES (
         IFNULL(%s, UNIX_TIMESTAMP()), UNIX_TIMESTAMP(), %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, FALSE
    )"""
    values = [
        created_date, person_id,
        get_source_id(params['source']), params['url'], params['identifier'],
        params['name'],
        params.get('competition_id'), archetype_id,
        params.get('resource_uri'),
        params.get('featured_card'),
        params.get('score'),
        params.get('thumbnail_url'),
        params.get('small_thumbnail_url'),
        params.get('finish'),
        get_deckhash(cards)
    ]
    db().begin('add_deck')
    deck_id = db().insert(sql, values)
    add_cards(deck_id, cards)
    d = load_deck(deck_id)
    prime_cache(d)
    db().commit('add_deck')
    return d
Пример #10
0
def fail(key: str, val: Any, expected_type: type) -> InvalidDataException:
    return InvalidDataException('Expected a {expected_type} for {key}, got `{val}` ({actual_type})'.format(expected_type=expected_type, key=key, val=val, actual_type=type(val)))
Пример #11
0
def scryfall_last_updated() -> datetime.datetime:
    d = fetch_tools.fetch_json('https://api.scryfall.com/bulk-data')
    for o in d['data']:
        if o['type'] == 'default_cards':
            return dtutil.parse_rfc3339(o['updated_at'])
    raise InvalidDataException(f'Could not get the last updated date from Scryfall: {d}')
Пример #12
0
def season_num(code_to_look_for: str) -> int:
    try:
        return SEASONS.index(code_to_look_for) + 1
    except KeyError as c:
        raise InvalidDataException('I did not find the season code (`{code}`) in the list of seasons ({seasons}) and I am confused.'.format(code=code_to_look_for, seasons=','.join(SEASONS))) from c
Пример #13
0
def parse_line(line: str) -> Tuple[int, str]:
    match = re.match(r'(\d+)\s+(.*)', line)
    if match is None:
        raise InvalidDataException('No number specified with `{line}`'.format(line=line))
    n, name = match.groups()
    return (int(n), name)