示例#1
0
def get_movies_somerville(theater, date):
    """Get movie names and times from Somerville Theater's website

    :theater: str
    :date: str (yyyy-mm-dd) (default: today)
    :returns: (list of movie names, list of lists of movie times)
    """
    BASE_URL = 'https://somervilletheatre.com/wp-content/themes/somerville/showtimes.xml'

    soup = soup_me(BASE_URL)

    movies = soup('filmtitle')

    movie_names = [m.shortname.text
                   for m in movies]  # /or/ m.find('name').text

    convert = lambda date: date[-4:] + date[:-4]  # mmddyyyy -> yyyymmdd

    movie_datetimes = [
        [
            (
                dparser.parse(' '.join(
                    (convert(d.text), t.text)))  # yyyymmdd hhmm ->
                .strftime('%Y-%m-%d @ %l:%M%P'))  # yyyy-mm-dd @ hh:mm {a,p}m
            for d, t in zip(m('date'), m('time'))
            if d.text == convert_date(date, fmt_out='%m%d%Y')
        ] for m in movies
    ]

    movie_times = filter_past(movie_datetimes)
    movie_names, movie_times = combine_times(
        *filter_movies(movie_names, movie_times))

    return movie_names, movie_times
示例#2
0
def get_movies_landmark(theater, date):
    """Get movie names and times from Kendall Landmark's website

    :theater: str
    :date: str (yyyy-mm-dd) (default: today)
    :returns: (list of movie names, list of lists of movie times)
    """
    BASE_URL = 'https://movie-lmt.peachdigital.com/movies/GetFilmsByCinema/21/151'

    djson = json_me(BASE_URL)

    movie_names = [movie['Title'] for movie in djson['Result']]

    movie_datetimes = [
        flatten([[
            '{} @ {}'.format(date, t['StartTime']) for t in sesh['Times']
            if convert_date(sesh['DisplayDate']) == date
        ] for sesh in seshes])
        for seshes in (movie['Sessions'] for movie in djson['Result'])
    ]

    movie_times = filter_past(movie_datetimes)
    movie_names, movie_times = combine_times(
        *filter_movies(movie_names, movie_times))

    return movie_names, movie_times
示例#3
0
def get_movies_ifc(theater, date):
    """Get movie names and times from IFC's website

    :theater: str
    :date: str (yyyy-mm-dd) (default: today)
    :returns: (list of movie names, list of lists of movie times)
    """
    BASE_URL = 'http://www.ifccenter.com/'

    soup = soup_me(BASE_URL)

    day, = [
        day for day in soup('div', class_=re.compile('^daily-schedule'))
        if day.h3.text != 'Coming Soon' and convert_date(day.h3.text) == date
    ]

    movie_divs = day('div')

    movie_names = [mdiv.h3.text for mdiv in movie_divs]
    movie_datetimes = [[
        '{} @ {}'.format(date, time.text) for time in mdiv('li')
    ] for mdiv in movie_divs]

    movie_times = filter_past(movie_datetimes)
    movie_names, movie_times = combine_times(
        *filter_movies(movie_names, movie_times))

    return movie_names, movie_times
示例#4
0
def get_movies_quad(theater, date):
    """Get movie names and times from Quad's website

    :theater: str
    :date: str (yyyy-mm-dd) (default: today)
    :returns: (list of movie names, list of lists of movie times)
    """
    BASE_URL = 'https://quadcinema.com/all/'

    soup = soup_me(BASE_URL)

    day, = [
        d for d in soup('div', class_='now-single-day')
        if convert_date(d.h1.text) == date
    ]

    movie_names = [movie.text for movie in day('h4')]
    movie_datetimes = [[
        '{} @ {}'.format(date, time.text.replace('.', ':'))
        for time in movie('li')
    ] for movie in day('div', class_='single-listing')]

    movie_times = filter_past(movie_datetimes)
    movie_names, movie_times = combine_times(
        *filter_movies(movie_names, movie_times))

    return movie_names, movie_times
示例#5
0
def get_movies_mfa(theater, date):
    """Get movie names and times from Museum of Fine Arts' website

    :theater: str
    :date: str (yyyy-mm-dd) (default: today)
    :returns: (list of movie names, list of lists of movie times)
    """
    BASE_URL = 'https://www.mfa.org/programs/film'

    PARAMS = {'field_date_value_1': date}

    soup = soup_me(BASE_URL, PARAMS)

    relevant_movies = [
        div for div in soup('div', class_='col-sm-8')
        if div.span and convert_date(div.span.contents[0]) == date
    ]
    movie_names = [m.a.text for m in relevant_movies]

    def convert(contentlst):
        date, _, timestr = contentlst
        start, end = timestr.split('–')
        return DATETIME_SEP.join((convert_date(date), start))

    movie_datetimes = [convert(m.span.contents) for m in relevant_movies]

    movie_times = filter_past(movie_datetimes)
    movie_names, movie_times = combine_times(
        *filter_movies(movie_names, movie_times))

    return movie_names, movie_times
示例#6
0
def get_movies_moma(theater, date):
    """Get movie names and times from Museum of Modern Arts's website

    :theater: str
    :date: str (yyyy-mm-dd) (default: today)
    :returns: (list of movie names, list of lists of movie times)
    """
    BASE_URL = 'https://www.moma.org/calendar/?utf8=%E2%9C%93&happening_filter=Films&date={}&location=both'

    soup = soup_me(BASE_URL.format(date))

    relevant_movies = [
        m
        for m in soup('div', class_='calendar-tile calendar-tile--tall-image')
        if date == convert_date((
            m.find('div', class_='center balance-text').text.replace(
                u'\xa0', ' ')  #   -> " "
            .split(', ')[1]))  # extract month & day from full datetime
    ]

    nested_movie_names = [  # list per showing.. some have multiple films
        [m.text for m in ms.h3('em')] if ms.h3('em') else [ms.h3.text]
        for ms in relevant_movies
    ]
    movie_names = [ms[-1] for ms in nested_movie_names
                   ]  # main attraction is the last film

    movie_formats = [
        '+ {}'.format(','.join(ms[:-1])) if len(ms) > 1 else ''
        for ms in nested_movie_names
    ]

    PATTERN = re.compile('–[0-9]*:?[0-9]*')
    movie_datetimes = [
        (
            dparser.parse(
                re.sub(
                    PATTERN,
                    '',  # remove any time ranges
                    m.find('div',
                           class_='center balance-text').text)).strftime(
                               DATETIME_SEP.join(('%Y-%m-%d', '%l:%M%P')))
        )  # yyyy-mm-dd @ hh:mm {a,p}m
        for m in relevant_movies
    ]
    movie_times = filter_past(movie_datetimes)

    # annotate with format
    movie_times = [(times if not times or not fmt else times + [f'[ {fmt} ]'])
                   for times, fmt in zip(movie_times, movie_formats)]

    movie_names, movie_times = filter_movies(movie_names, movie_times)

    return movie_names, movie_times
示例#7
0
def get_movies_showtimes(theater, date):
    """Get movie names and times from Showtimes' website

    :theater: str
    :date: str (yyyy-mm-dd) (default: today)
    :returns: (list of movie names, list of lists of movie times)
    """
    BASE_URL = 'https://www.showtimes.com/movie-theaters/{}'

    D_THEATERS = {
        'regal fenway': lambda *args: 'regal-fenway-stadium-13-rpx-6269',
        'ua court st': lambda *args: 'ua-court-street-stadium-12-rpx-6608'
    }

    try:
        soup = soup_me(
            BASE_URL.format(
                D_THEATERS.get(theater.lower(), get_theaterpg_showtimes)(
                    theater)))  # fallback for unlisted theater
        # (phrased as functions, so theaterpg scraper won't run until necessary)

        movies = soup('li', class_='movie-info-box')

    except (Exception) as e:
        print(error_str.format(e))  # error msg only
        movies = []  # no matching theater

    movie_names = [
        ''.join((re.sub('[\r\n].*', '', name.text.strip())
                 for name in m('h2', class_='media-heading'))) for m in movies
    ]

    nested_buttons = [  # [[day, time, time, day, time], ..] -> [[[day, time, time], [day, time]], ..]
        list(
            split_before((button.text
                          for button in m('button', type='button')),
                         lambda txt: ',' in txt)) for m in movies
    ]

    movie_datetimes = [
        flatten(
            [['{} @ {}'.format(day.replace(':', ''), time) for time in times]
             for day, *times in buttons
             if (convert_date(day.replace(':', '')) == date)])
        for buttons in nested_buttons
    ]

    movie_times = filter_past(movie_datetimes)
    movie_names, movie_times = combine_times(
        *filter_movies(movie_names, movie_times))

    return movie_names, movie_times
示例#8
0
def get_movies_somerville(theater, date):
    """Get movie names and times from Somerville Theater's website

    :theater: str
    :date: str (yyyy-mm-dd) (default: today)
    :returns: (list of movie names, list of lists of movie times)
    """
    BASE_URL = 'https://somervilletheatre.com/wp-content/themes/somerville/showtimes.xml'

    soup = soup_me(BASE_URL)

    movies = soup('filmtitle')

    movie_names = [m.shortname.text
                   for m in movies]  # /or/ m.find('name').text

    PATTERN = re.compile(' ((35|70)mm)$', re.I)

    def extract_fmt(m):
        m, *fmt = re.split(PATTERN, m)[:2]  # only name and (35|70)mm, if any
        return m, ''.join(fmt).lower()  # (cleaned) movie name, movie fmt

    movie_names, movie_formats = zip(*(extract_fmt(m) for m in movie_names))

    convert = lambda date: date[-4:] + date[:-4]  # mmddyyyy -> yyyymmdd

    movie_datetimes = [
        [
            (
                dparser.parse(' '.join(
                    (convert(d.text), t.text)))  # yyyymmdd hhmm ->
                .strftime(DATETIME_SEP.join(
                    ('%Y-%m-%d', '%l:%M%P'))))  # yyyy-mm-dd @ hh:mm {a,p}m
            for d, t in zip(m('date'), m('time'))
            if d.text == convert_date(date, fmt_out='%m%d%Y')
        ] for m in movies
    ]
    movie_times = filter_past(movie_datetimes)

    # annotate with formats
    movie_times = [(times if not times or not fmt else times + [f'[ {fmt} ]'])
                   for times, fmt in zip(movie_times, movie_formats)]

    movie_names, movie_times = combine_times(
        *filter_movies(movie_names, movie_times))

    return movie_names, movie_times
示例#9
0
def get_movies_pghfilmmakers(theater, date):
    """Get movie names and times from Pittsburgh Filmmakers website

    :theater: str
    :date: str (yyyy-mm-dd) (default: today)
    :returns: (list of movie names, list of lists of movie times)
    """
    BASE_URL = 'http://cinema.pfpca.org/films/showtimes?location={}'

    D_THEATERS = {
        'regent square theater': 24,
        'harris theater': 20,
        'melwood screening room': 18
    }

    soup = soup_me(BASE_URL.format(D_THEATERS[theater.lower()]))

    # get date block
    try:
        block, = [
            day for day in soup('caption')
            if day.text == convert_date(date, fmt_out='%a, %b %-d')
        ]
    except (ValueError):  # indexing into empty list
        return [], []

    movie_names = [
        name.text
        for name in block.next.next.next('a', href=re.compile('/films/*'))
    ]

    movie_datetimes = [
        ' @ '.join((date, div.next.next.next.text.strip()))
        for div in block.next.next.next(
            'td', class_='views-field views-field-field-location')
    ]

    movie_times = filter_past(movie_datetimes)

    # filter movies with no future times
    # & combine times for same movie
    movie_names, movie_times = combine_times(
        *filter_movies(movie_names, movie_times))

    return movie_names, movie_times
示例#10
0
def get_movies_quad(theater, date):
    """Get movie names and times from Quad's website

    :theater: str
    :date: str (yyyy-mm-dd) (default: today)
    :returns: (list of movie names, list of lists of movie times)
    """
    BASE_URL = 'https://quadcinema.com/all/'

    soup = soup_me(BASE_URL)

    try:
        day, = [
            d for d in soup('div', class_='now-single-day')
            if convert_date(d.h1.text) == date
        ]
    except (ValueError):  # no matching date listed yet
        return [], []

    movie_names = [movie.text for movie in day('h4')]

    movies = day('div', class_='single-listing')

    PATTERN = re.compile('^time')
    movie_datetimes = [[
        DATETIME_SEP.join((date, time.text.replace('.', ':')))
        for time in m('li', class_=PATTERN)
    ] for m in movies]
    movie_times = filter_past(movie_datetimes)

    ANTIPATTERN = re.compile('^[^(time)]')  # non-showtime `li`s
    movie_formats = [[fmt.text for fmt in m('li', class_=ANTIPATTERN)]
                     for m in movies]

    # annotate with formats
    movie_times = [(times if not times or not fmt else times +
                    ['[ {} ]'.format(','.join(fmt))])
                   for times, fmt in zip(movie_times, movie_formats)]

    movie_names, movie_times = combine_times(
        *filter_movies(movie_names, movie_times))

    return movie_names, movie_times
示例#11
0
        try:
            city = maybe_city
            theaters = get_theaters(city)
            date = maybe_date
        except (FileNotFoundError, AssertionError):  # date rather than city
            try:
                city = maybe_date  # could be None..
                theaters = get_theaters(city)
            except (FileNotFoundError,
                    AssertionError):  # date rather than city
                city = CITY
                theaters = get_theaters(city)
            date = maybe_city if maybe_city is not None else DATE

        moviegetter = partial(get_movies, date=convert_date(date))

    # do stuff
    need_ratings = args.filter_by > 0 or not args.simple
    if need_ratings:
        d_cached = {}

        try:
            from ratings import get_ratings
        except (Exception) as e:  # e.g. missing secrets
            msg, = e.args
            print(msg + '\n\n')

            need_ratings = False

    for theater in theaters:
示例#12
0
def get_movies_google(theater, date, *args, **kwargs):
    """Get movie names and times from Google search

    :theater: str
    :date: str (yyyy-mm-dd) (default: today)
    :args, kwargs: other search terms, e.g. zip code
    :returns: (list of movie names, list of lists of movie times)
    """
    # date = convert_date(date, fmt_out='%A %m/%d')
    fdate = convert_date(date, fmt_out='%A')  # formatted for search
    fdate = fdate if fdate != convert_date('today',
                                           fmt_out='%A') else 'today'  #''
    # date = convert_date(date, fmt_out='%m/%d') # /%y')

    BASE_URL = 'https://www.google.com/search'

    PARAMS = {
        'q': safe_encode('showtimes', '"{}"'.format(theater), fdate),
        'ie': 'utf-8',
        'client': 'firefox-b-1-e'
    }

    # soup = soup_me(BASE_URL, PARAMS) #, **kwargs)
    # ^ passing params directly to requests gives problems with extraneous % encoding
    soup = soup_me(compose_query(BASE_URL, PARAMS))

    # TODO google static html only returns up to 10 movies..

    CLASS = AttrDict(timelist='lr_c_fcc',
                     time=re.compile('^(std-ts)|(lr_c_stnl)$'),
                     fmt='lr_c_vn')

    try:
        relevant_div = soup.find('div', {'data-date': True})

        # check date
        date_found = relevant_div.attrs['data-date']
        assert convert_date(date_found) == date, '{} != {}'.format(
            date_found, date)

        movies = relevant_div('div', {'data-movie-name': True})

    except (AssertionError, AttributeError) as e:
        # print(error_str.format(e)) # error msg only
        # movies = []                # no movies found for desired theater/date
        print(error_str.format('No matching theater on google'))
        raise (NoMoviesException(e))

    movie_names = [m.span.text for m in movies]

    movie_times = [  # nested times per format per movie
        [[time.text for time in timelst('div', class_=CLASS.time)]
         for timelst in m('div', class_=CLASS.timelist)] for m in movies
    ]

    movie_formats = [
        [
            getattr(timelst.find('div', class_=CLASS.fmt), 'text',
                    None)  # default if no format listed
            for timelst in m('div', class_=CLASS.timelist)
        ] for m in movies
    ]

    # flatten timelists for movies with multiple formats
    n_timelists_per_movie = [len(timelsts) for timelsts in movie_times]
    movie_names = list(
        chain.from_iterable(
            [name] * n for name, n in zip(movie_names, n_timelists_per_movie)))

    # annotate with format
    movie_times = [
        (times if fmt == 'Standard' or not times or not fmt else times +
         ['[ {} ]'.format(fmt)])
        for times, fmt in zip(flatten(movie_times), flatten(movie_formats))
    ]

    # no need to filter - tags only correspond to upcoming movie times
    return movie_names, movie_times
示例#13
0
 def convert(contentlst):
     date, _, timestr = contentlst
     start, end = timestr.split('–')
     return DATETIME_SEP.join((convert_date(date), start))