def parse_thursday(thday): print(get_thursday(thday['thursday'])) time.sleep(4) d, e = get_page(get_thursday(thday['thursday'])) ts = d.select('section.events__table table') rs = ts[0].select('tr') for r in rs[1:]: cs = r.select('td') film = {} boxoffice = {'thursday': str(thday['thursday'].date())} for idx, c in enumerate(cs): print(idx, c) if idx == 0: boxoffice['pos'] = c.text print('\tpos: ', boxoffice['pos']) if idx == 1: film['title'] = c.text print('\ttitle: ', film['title']) film['page'] = c.select_one('a')['href'] print('\tpage: ', film['page']) film['id'] = c.select_one('a')['rel'][0] boxoffice['film'] = film['id'] print('\tname: ', film['id']) if idx == 2: boxoffice['distributor'] = c.text print('\tdistributor: ' + boxoffice['distributor']) if idx == 3: boxoffice['thursday_rur'] = num(c.text) print('\tthursdayRur: ' + boxoffice['thursday_rur']) save_film(film) save_thursday_boxoffice(boxoffice)
def page(page): sleep(4) doc, e = get_page(get_movie(page)) actors = [] for actor_item in doc.select('span[itemprop=actor]'): save_person(Person(-1, actor_item.text.strip())) actors.append(actor_item.text.strip()) return actors
def parse_weekend(week): print(get_weekend(week['weekend'])) time.sleep(4) d, e = get_page(get_weekend(week['weekend'])) rs = d.select('table#krestable tr') for r in rs[1:]: cs = r.select('td') film = {} boxoffice = {'weekend': str(week['weekend'].date())} for idx, c in enumerate(cs): print(idx, c) if idx == 1: boxoffice['pos'] = c.text print('\tpos: ', boxoffice['pos']) if idx == 3: print('\ttitle: ', c.text) film['title'] = c.text print('\tpage: ', c.select_one('a')['href']) film['page'] = c.select_one('a')['href'] print('\tname: ', c.select_one('a')['rel']) film['id'] = c.select_one('a')['rel'][0] boxoffice['film'] = c.select_one('a')['rel'][0] if idx == 4: print('\toriginal: ' + c.text) film['original'] = c.text if idx == 5: print('\tdistributor: ' + c.text) boxoffice['distributor'] = c.text if idx == 6: print('\tweekendRur: ' + c.text) boxoffice['weekend_rur'] = num(c.text) if idx == 8: print('\tscreens: ' + c.text) boxoffice['screens'] = num(c.text) if idx == 10: print('\tdays: ' + c.text) boxoffice['days'] = num(c.text) if idx == 11: print('\ttotalRur: ' + c.text) boxoffice['total_rur'] = num(c.text) if idx == 12: print('\tspectaculars: ' + c.text) boxoffice['spectaculars'] = num(c.text) save_film(film) save_weekend_boxoffice(boxoffice)
print('\tscreens: ' + c.text) boxoffice['screens'] = num(c.text) if idx == 10: print('\tdays: ' + c.text) boxoffice['days'] = num(c.text) if idx == 11: print('\ttotalRur: ' + c.text) boxoffice['total_rur'] = num(c.text) if idx == 12: print('\tspectaculars: ' + c.text) boxoffice['spectaculars'] = num(c.text) save_film(film) save_weekend_boxoffice(boxoffice) doc, err = get_page(urls['weekends']) rows = doc.select('table.calendar_year tbody tr') for row in rows: cells = row.select('td') weekend = {} for index, cell in enumerate(cells): print(index, cell) if index == 0: print('\ttitle: ', cell.text) weekend['title'] = cell.text print('\tpage: ', cell.select_one('a')['href']) weekend['page'] = cell.select_one('a')['href'] parts = cell.select_one('a')['href'].split('/') weekend['weekend'] = parse(parts[-2], dayfirst=True) if index == 1: print('\ttotalRur: ' + cell.text)
from kb import urls from net import get_page from store import save_film, save_boxoffice from utils import num doc, err = get_page(urls['year']) rows = doc.select('table.calendar_year tr') for row in rows[1:]: cells = row.select('td') film = {} boxoffice = {} for index, cell in enumerate(cells): print(index, cell) if index == 0: print('\tpos: ', cell.text) boxoffice['pos'] = cell.text if index == 1: print('\ttitle: ', cell.text) film['title'] = cell.text print('\tpage: ', cell.select_one('a')['href']) film['page'] = cell.select_one('a')['href'] print('\tname: ', cell.select_one('a')['name']) film['id'] = cell.select_one('a')['name'] boxoffice['film'] = cell.select_one('a')['name'] if index == 2: print('\toriginal: ' + cell.text) film['original'] = cell.text if index == 3: print('\tdistributor: ' + cell.text) boxoffice['distributor'] = cell.text
import time from dateutil.parser import parse from kb import urls, get_thursday from net import get_page from store import save_thursday, save_film, save_thursday_boxoffice from utils import num doc, err = get_page(urls['thursdays']) rows = doc.select('table.calendar_year tbody tr') def parse_thursday(thday): print(get_thursday(thday['thursday'])) time.sleep(4) d, e = get_page(get_thursday(thday['thursday'])) ts = d.select('section.events__table table') rs = ts[0].select('tr') for r in rs[1:]: cs = r.select('td') film = {} boxoffice = {'thursday': str(thday['thursday'].date())} for idx, c in enumerate(cs): print(idx, c) if idx == 0: boxoffice['pos'] = c.text print('\tpos: ', boxoffice['pos']) if idx == 1: film['title'] = c.text print('\ttitle: ', film['title'])