Пример #1
0
def parse_row(row):
    tds = [td.text for td in row.select("td")]
    img = thumbnails.select_one(f'a[title="{tds[1]}"] img')
    data = {
        'id':
        tds[0],
        'name':
        tds[1],
        'rarity':
        tds[2],
        'type':
        tds[3],
        'affiliation':
        tds[4],
        'stats': {
            'firepower': tds[5],
            'health': tds[6],
            'antiAir': tds[7],
            'speed': tds[8],
            'airPower': tds[9],
            'torpedo': tds[10]
        },
        'url':
        build_url(row.select_one('a')['href']),
        'thumbnail':
        build_url(
            img['srcset'].split()[0] if img.has_attr('srcset') else img['src'])
    }

    return data
Пример #2
0
def extract_pictures(html):
    return {
        'images': [
            extract_skin(tab)
            for tab in html.select('div.shiparttabbernew .tabbertab')
        ],
        'icon':
        build_url(html.select_one('img')['src'])
        if html.select_one('img') else '',
        'chibi':
        build_url(html.select_one('#talkingchibi img')['src'])
        if html.select_one('#talkingchibi img') else ''
    }
Пример #3
0
def extract_skin(tab):
    image_path = tab.select_one('img')['srcset'].split(' ')[-2]
    return {'name': tab['title'], 'url': build_url(image_path)}
Пример #4
0
from requests import get
from shared import save_fixture, build_url
from bs4 import BeautifulSoup

html = get('https://azurlane.koumakan.jp/Equipment_List').text
fp = BeautifulSoup(html, 'lxml')
category_urls = [link['href'] for link in fp.find('ul').select('a')]

for category_url in category_urls:
    category_html = get(build_url(category_url)).text
    table = BeautifulSoup(category_html, 'lxml').select_one('.tabbertab table')
    paths = set([
        row.find('a')['href'] for row in table.find_all('tr')
        if not row.find('th')
    ])

    for path in paths:
        name = path[1:]
        if 'w/index.php?' in name:
            continue
        print(name)
        save_fixture('equipment', name, get(build_url(path)).text)
Пример #5
0
def extract_picture(html):
    img = html.find('img')
    path = img['srcset'].split(' ')[-2] if img.has_attr(
        'srcset') else img['src']
    return build_url(path)
Пример #6
0
from requests import get
from shared import save_fixture, build_url
from bs4 import BeautifulSoup

html = get('https://azurlane.koumakan.jp/List_of_Ships').text
fp = BeautifulSoup(html, 'lxml')
rows = fp.select('.mw-parser-output .wikitable tr')
urls = [build_url(row.find('a')['href']) for row in rows if not row.find('th')]

for url in urls:
    name = url.split('/')[-1]
    print(name)
    save_fixture('ships_long', name, get(url).text)