示例#1
0
def parse_thursday(thday):
    print(get_thursday(thday['thursday']))
    time.sleep(4)
    d, e = get_page(get_thursday(thday['thursday']))
    ts = d.select('section.events__table table')
    rs = ts[0].select('tr')
    for r in rs[1:]:
        cs = r.select('td')
        film = {}
        boxoffice = {'thursday': str(thday['thursday'].date())}
        for idx, c in enumerate(cs):
            print(idx, c)
            if idx == 0:
                boxoffice['pos'] = c.text
                print('\tpos: ', boxoffice['pos'])
            if idx == 1:
                film['title'] = c.text
                print('\ttitle: ', film['title'])
                film['page'] = c.select_one('a')['href']
                print('\tpage: ', film['page'])
                film['id'] = c.select_one('a')['rel'][0]
                boxoffice['film'] = film['id']
                print('\tname: ', film['id'])
            if idx == 2:
                boxoffice['distributor'] = c.text
                print('\tdistributor: ' + boxoffice['distributor'])
            if idx == 3:
                boxoffice['thursday_rur'] = num(c.text)
                print('\tthursdayRur: ' + boxoffice['thursday_rur'])
        save_film(film)
        save_thursday_boxoffice(boxoffice)
示例#2
0
def page(page):
    sleep(4)
    doc, e = get_page(get_movie(page))
    actors = []
    for actor_item in doc.select('span[itemprop=actor]'):
        save_person(Person(-1, actor_item.text.strip()))
        actors.append(actor_item.text.strip())
    return actors
示例#3
0
def parse_weekend(week):
    print(get_weekend(week['weekend']))
    time.sleep(4)
    d, e = get_page(get_weekend(week['weekend']))
    rs = d.select('table#krestable tr')
    for r in rs[1:]:
        cs = r.select('td')
        film = {}
        boxoffice = {'weekend': str(week['weekend'].date())}
        for idx, c in enumerate(cs):
            print(idx, c)
            if idx == 1:
                boxoffice['pos'] = c.text
                print('\tpos: ', boxoffice['pos'])
            if idx == 3:
                print('\ttitle: ', c.text)
                film['title'] = c.text
                print('\tpage: ', c.select_one('a')['href'])
                film['page'] = c.select_one('a')['href']
                print('\tname: ', c.select_one('a')['rel'])
                film['id'] = c.select_one('a')['rel'][0]
                boxoffice['film'] = c.select_one('a')['rel'][0]
            if idx == 4:
                print('\toriginal: ' + c.text)
                film['original'] = c.text
            if idx == 5:
                print('\tdistributor: ' + c.text)
                boxoffice['distributor'] = c.text
            if idx == 6:
                print('\tweekendRur: ' + c.text)
                boxoffice['weekend_rur'] = num(c.text)
            if idx == 8:
                print('\tscreens: ' + c.text)
                boxoffice['screens'] = num(c.text)
            if idx == 10:
                print('\tdays: ' + c.text)
                boxoffice['days'] = num(c.text)
            if idx == 11:
                print('\ttotalRur: ' + c.text)
                boxoffice['total_rur'] = num(c.text)
            if idx == 12:
                print('\tspectaculars: ' + c.text)
                boxoffice['spectaculars'] = num(c.text)
        save_film(film)
        save_weekend_boxoffice(boxoffice)
示例#4
0
                print('\tscreens: ' + c.text)
                boxoffice['screens'] = num(c.text)
            if idx == 10:
                print('\tdays: ' + c.text)
                boxoffice['days'] = num(c.text)
            if idx == 11:
                print('\ttotalRur: ' + c.text)
                boxoffice['total_rur'] = num(c.text)
            if idx == 12:
                print('\tspectaculars: ' + c.text)
                boxoffice['spectaculars'] = num(c.text)
        save_film(film)
        save_weekend_boxoffice(boxoffice)


doc, err = get_page(urls['weekends'])
rows = doc.select('table.calendar_year tbody tr')
for row in rows:
    cells = row.select('td')
    weekend = {}
    for index, cell in enumerate(cells):
        print(index, cell)
        if index == 0:
            print('\ttitle: ', cell.text)
            weekend['title'] = cell.text
            print('\tpage: ', cell.select_one('a')['href'])
            weekend['page'] = cell.select_one('a')['href']
            parts = cell.select_one('a')['href'].split('/')
            weekend['weekend'] = parse(parts[-2], dayfirst=True)
        if index == 1:
            print('\ttotalRur: ' + cell.text)
示例#5
0
from kb import urls
from net import get_page
from store import save_film, save_boxoffice
from utils import num

doc, err = get_page(urls['year'])
rows = doc.select('table.calendar_year tr')

for row in rows[1:]:
    cells = row.select('td')
    film = {}
    boxoffice = {}
    for index, cell in enumerate(cells):
        print(index, cell)
        if index == 0:
            print('\tpos: ', cell.text)
            boxoffice['pos'] = cell.text
        if index == 1:
            print('\ttitle: ', cell.text)
            film['title'] = cell.text
            print('\tpage: ', cell.select_one('a')['href'])
            film['page'] = cell.select_one('a')['href']
            print('\tname: ', cell.select_one('a')['name'])
            film['id'] = cell.select_one('a')['name']
            boxoffice['film'] = cell.select_one('a')['name']
        if index == 2:
            print('\toriginal: ' + cell.text)
            film['original'] = cell.text
        if index == 3:
            print('\tdistributor: ' + cell.text)
            boxoffice['distributor'] = cell.text
示例#6
0
import time

from dateutil.parser import parse

from kb import urls, get_thursday
from net import get_page
from store import save_thursday, save_film, save_thursday_boxoffice
from utils import num

doc, err = get_page(urls['thursdays'])
rows = doc.select('table.calendar_year tbody tr')


def parse_thursday(thday):
    print(get_thursday(thday['thursday']))
    time.sleep(4)
    d, e = get_page(get_thursday(thday['thursday']))
    ts = d.select('section.events__table table')
    rs = ts[0].select('tr')
    for r in rs[1:]:
        cs = r.select('td')
        film = {}
        boxoffice = {'thursday': str(thday['thursday'].date())}
        for idx, c in enumerate(cs):
            print(idx, c)
            if idx == 0:
                boxoffice['pos'] = c.text
                print('\tpos: ', boxoffice['pos'])
            if idx == 1:
                film['title'] = c.text
                print('\ttitle: ', film['title'])