Пример #1
0
    titleElement = element.find_element_by_css_selector('a')
    link = titleElement.get_attribute('href')
    title = titleElement.get_attribute('title')
    movie = Movie(title, link)
    movies.append(movie)
for movie in movies:
    driver.get(movie.link)
    print('Processing ', movie.title)
    movie.synopsis = driver.find_element_by_css_selector('div#sinopsis').text
    datos = driver.find_element_by_css_selector(
        'div#tecnicos > p').get_attribute('innerHTML')
    for dato in datos.split('<br>'):
        if 'Género' in dato:
            movie.genre = dato.split(': ')[1]
        if 'Director' in dato:
            movie.director = dato.split(': ')[1]
        if 'Actores' in dato:
            movie.cast = dato.split(': ')[1].split(',')
        if 'Duración' in dato:
            movie.duration = dato.split(': ')[1]
    rooms = driver.find_elements_by_css_selector('.accordion > div.card.panel')
    for r in rooms:
        room = r.find_element_by_css_selector('h2.panel-title')
        types = r.find_elements_by_css_selector(
            '.movie-showtimes-component-combination')
        for t in types:
            type_data = list(
                map(
                    lambda x: x.strip(),
                    t.find_element_by_css_selector(
                        '.movie-showtimes-component-label small').
Пример #2
0
tags = soup.find_all('li', class_="ui-slide-item")

movies = []

for tag in tags:
    # print("=========================")
    # print(tag)
    # soup.select('li[data-actors]')
    # print(tag.li)

    movie = Movie()
    # 主演
    movie.actors = tag.attrs['data-actors']
    # 导演
    movie.director = tag.attrs['data-director']
    # 时长
    movie.duration = tag.attrs['data-duration']
    # 豆瓣评分
    movie.rate = tag.attrs["data-rate"]
    # 发布地区
    movie.region = tag.attrs["data-region"]
    # 发布时间
    movie.release = tag.attrs["data-release"]
    # 电影名称
    movie.title = tag.attrs["data-title"]
    # 相关视频地址
    movie.trailer = tag.attrs["data-trailer"]
    # 电影封面
    movie.cover = tag.img.attrs["src"]
    # 豆瓣地址
    def read_csv_file(self):
        with open(self.__file_name, mode='r', encoding='utf-8-sig') as csvfile:
            movie_file_reader = csv.DictReader(csvfile)

            for row in movie_file_reader:
                self.movies.append(row['Title'])
                self.actors.append(row['Actors'])
                self.genres.append(row['Genre'])
                self.directors.append(row['Director'])
                self.year.append(int(row['Year']))
                self.description.append(row['Description'])
                self.runtime.append(row['Runtime (Minutes)'])
                self.external_ratings.append(row['Rating'])
                self.votes.append(row['Votes'])
                self.metascores.append(row['Metascore'])
                self.revenues.append(row['Revenue (Millions)'])

            index = 0
            for mov in self.movies:
                mov = mov.split(',')
                mov = " ".join(mov)

                movie = Movie(mov, self.year[index])
                if movie not in self.dataset_of_movies:
                    self.dataset_of_movies.append(movie)
                    movie.description = self.description[index]
                    movie.director = Director(self.directors[index])
                    for genre in self.genres[index].split(','):
                        movie.add_genre(Genre(genre))
                    for actor in self.actors[index].split(','):
                        movie.add_actor(Actor(actor))
                    if self.runtime == 'N/A':
                        pass
                    else:
                        movie.runtime_minutes = int(self.runtime[index])
                    if self.external_ratings == "N/A":
                        pass
                    else:
                        movie.external_rating = float(
                            self.external_ratings[index])
                    if self.revenues[index] == "N/A":
                        pass
                    else:
                        movie.revenue = float(self.revenues[index])
                    if self.metascores[index] == "N/A":
                        pass
                    else:
                        movie.metascore = int(self.metascores[index])
                else:
                    pass
                index += 1

            for actor in self.actors:
                list_actors = actor.split(',')
                for act in list_actors:
                    act = act.split()
                    act = " ".join(act)
                    if Actor(act) not in self.dataset_of_actors:
                        self.dataset_of_actors.append(Actor(act))
                    else:
                        pass

            for genre in self.genres:
                list_genre = genre.split(',')
                for gen in list_genre:
                    gen = gen.split()
                    gen = " ".join(gen)
                    if Genre(gen) not in self.dataset_of_genres:
                        self.dataset_of_genres.append(Genre(gen))
                    else:
                        pass

            for director in self.directors:
                list_directors = director.split(',')
                for dir in list_directors:
                    dir = dir.split()
                    dir = " ".join(dir)
                    if Director(dir) not in self.dataset_of_directors:
                        self.dataset_of_directors.append(Director(dir))
                    else:
                        pass
#
#----------------------------------

from movie import Movie
import fresh_tomatoes

# Movies
# Arrival
arrival = Movie("Arrival",
                "A linguist is recruited by the military to assist in translating alien communications.",
                "./images/arrival.jpg",
                "https://www.youtube.com/watch?v=ZLO4X6UI8OY")

arrival.storyline = "When mysterious spacecraft touch down across the globe, an elite team - led by expert linguist Louise Banks - is brought together to investigate. As mankind teeters on the verge of global war, Banks and the team race against time for answers - and to find them, she will take a chance that could threaten her life, and quite possibly humanity."

arrival.director = "Denis Villeneuve"
arrival.writers = "Eric Heisserer (screenplay), Ted Chiang (based on the story \"Story of Your Life\" written by)"
arrival.stars = "Amy Adams, Jeremy Renner, Forest Whitaker"
arrival.taglines = "Why are they here?"
arrival.genres = "Drama | Mystery | Sci-Fi | Thriller"
arrival.country = "USA"
arrival.language = "English"
arrival.release_date = "11 November 2016 (Norway)"
arrival.runtime = "116 min"

# Kong: Skull Island
kong_skull_island = Movie("Kong: Skull Island",
                          "An action/adventure story centered on King Kong's origins.",
                          "./images/KongSkullIsland.jpg",
                          "https://www.youtube.com/watch?v=h9y6oPka3us")
Пример #5
0
def get_info():
    global stage
    time.sleep(5)
    print "-CRAWLER- Start to get movie feature..."
    while (not mvIDQ.empty()) or stage == 0:
        try:
            mvID = mvIDQ.get()
            # get info from imdmpy with movie id
            # print "-CRAWLER- Getting movie(id: %s) feature..." % mvID
            mvIN = imdb_access.get_movie(mvID)
            # create new Movie object
            mvOJ = Movie()
            # ID string
            mvOJ.id = mvID
            # title string
            mvOJ.title = mvIN.get('title')
            # poster url string
            mvOJ.cover_url = mvIN.get('cover url')
            # Bigger poster url string
            mvOJ.giant_cover_url = mvIN.get('full-size cover url')
            # genres string list
            if mvIN.has_key('genres'):
                sIN = ""
                for i in mvIN.get('genres'):
                    sIN += (i + '$')
                mvOJ.genres = sIN[0:len(sIN) - 1]
            # color string list
            if mvIN.has_key('color info'):
                sIN = ""
                for i in mvIN.get('color info'):
                    sIN += (i + '$')
                mvOJ.color_info = sIN[0:len(sIN) - 1]
            # director string list
            if mvIN.has_key('director'):
                sIN = ""
                for i in mvIN.get('director'):
                    sIN += i['name'] + '$'
                mvOJ.director = sIN[0:len(sIN) - 1]
            # 1st Actor
            mvOJ.cast_1st = mvIN.get('cast')[0]['name']
            if len(mvIN.get('cast')) >= 2:
                # 2nd Actor
                mvOJ.cast_2nd = mvIN.get('cast')[1]['name']
            if len(mvIN.get('cast')) >= 3:
                # 3rd Actor
                mvOJ.cast_3rd = mvIN.get('cast')[2]['name']
            # country string list
            if mvIN.has_key('countries'):
                sIN = ""
                for i in mvIN.get('countries'):
                    sIN += (i + '$')
                mvOJ.countries = sIN[0:len(sIN) - 1]
            # language string list
            if mvIN.has_key('languages'):
                sIN = ""
                for i in mvIN.get('languages'):
                    sIN += (i + '$')
                mvOJ.languages = sIN[0:len(sIN) - 1]
            # writer string list
            if mvIN.has_key('writer'):
                sIN = ""
                for i in mvIN.get('writer'):
                    sIN += i['name'] + '$'
                mvOJ.writer = sIN[0:len(sIN) - 1]
            # editor string list
            if mvIN.has_key('editor'):
                sIN = ""
                for i in mvIN.get('editor'):
                    sIN += i['name'] + '$'
                mvOJ.editor = sIN[0:len(sIN) - 1]
            # cinematographer string list
            if mvIN.has_key('cinematographer'):
                sIN = ""
                for i in mvIN.get('cinematographer'):
                    sIN += i['name'] + '$'
                mvOJ.cinematographer = sIN[0:len(sIN) - 1]
            # art direction string list
            if mvIN.has_key('art direction'):
                sIN = ""
                for i in mvIN.get('art direction'):
                    sIN += i['name'] + '$'
                mvOJ.art_director = sIN[0:len(sIN) - 1]
            # costume designer string list
            if mvIN.has_key('costume designer'):
                sIN = ""
                for i in mvIN.get('costume designer'):
                    sIN += i['name'] + '$'
                mvOJ.costume_designer = sIN[0:len(sIN) - 1]
            # music By string list
            if mvIN.has_key('original music'):
                sIN = ""
                for i in mvIN.get('original music'):
                    sIN += i['name'] + '$'
                mvOJ.original_music = sIN[0:len(sIN) - 1]
            # sound string list
            if mvIN.has_key('sound mix'):
                sIN = ""
                for i in mvIN.get('sound mix'):
                    sIN += (i + '$')
                mvOJ.sound_mix = sIN[0:len(sIN) - 1]
            # production company string list
            if mvIN.has_key('production companies'):
                sIN = ""
                for i in mvIN.get('production companies'):
                    sIN += i['name'] + '$'
                mvOJ.production_companies = sIN[0:len(sIN) - 1]
            # year int
            if mvIN.has_key('year'):
                mvOJ.year = mvIN.get('year')
            else:
                mvOJ.year = 0
            # running time int
            if mvIN.has_key('runtimes'):
                try:
                    if str(mvIN.get('runtimes')[0]).find(':') != -1:
                        mvOJ.runtimes = int(
                            str(mvIN.get('runtimes')[0]).split(':')[1])
                    else:
                        mvOJ.runtimes = int(mvIN.get('runtimes')[0])
                except Exception:
                    mvOJ.runtimes = 0
            else:
                mvOJ.runtimes = 0
            # budget int
            # if 'budget' in mvIN:
            #     mvOJ.budget = mvIN.get('budget')
            # get rating for old movies
            if mode == "old":
                mvOJ.number_of_votes = get_rating(mvID)
            mvINQ.put(mvOJ)
            mvIDQ.task_done()
            # print '-CRAWLER- Get movie features(ID: %s) successfully.' % mvID
        # TODO cannot handle exception
        except Exception, e:
            print '-CRAWLER- An {} exception occured!'.format(e), mvID
            mvINQ.put(mvID)
        time.sleep(1)