Python SOUP示例，bs4.SOUP Python示例

示例#1

0

显示文件

文件： movie_recommend2.py 项目： Vish1811/super-scripts

def f1():
    count = 0
    emotion = "Sad"
    print("SAD MOVIES HAIN")
    urlhere = 'http://www.imdb.com/search/title?genres=drama&title_type=feature&sort=moviemeter, asc'
    response = HTTP.get(urlhere)
    data = response.text
    soup = SOUP(data, "lxml")
    title = soup.find_all("a", attrs={"href": re.compile(r'\/title\/tt+\d*\/')})
    for i in title:
        tmp = str(i).split('>')
        if (len(tmp) == 3):
            print(tmp[1][:-3])
        if (count > 11):
            break
        count += 1
    print()
    print()

示例#2

0

显示文件

def main(emotion):

    if (emotion == 2):
        urlhere = 'http://www.imdb.com/search/title?genres=drama&title_type=feature&sort=moviemeter, asc'

    # IMDb Url for Musical genre of
    # movie against emotion Disgust
    elif (emotion == 4):
        urlhere = 'http://www.imdb.com/search/title?genres=musical&title_type=feature&sort=moviemeter, asc'

    # IMDb Url for Family genre of
    # movie against emotion Anger
    elif (emotion == 1):
        urlhere = 'http://www.imdb.com/search/title?genres=family&title_type=feature&sort=moviemeter, asc'

    # IMDb Url for Thriller genre of
    # movie against emotion Anticipation
    elif (emotion == 5):
        urlhere = 'https://www.imdb.com/search/title/?genres=happy&title_type=feature&sort=moviemeter, asc'

    # IMDb Url for Sport genre of
    # movie against emotion Fear
    elif (emotion == 3):
        urlhere = 'http://www.imdb.com/search/title?genres=sport&title_type=feature&sort=moviemeter, asc'

    # IMDb Url for Thriller genre of
    # movie against emotion Enjoyment
    elif (emotion == 6):
        urlhere = 'https://www.imdb.com/search/title/?genres=comedy&title_type=feature&sort=moviemeter, asc'

    # HTTP request to get the data of
    # the whole page
    response = HTTP.get(urlhere)
    data = response.text

    # Parsing the data using
    # BeautifulSoup
    soup = SOUP(data, "lxml")

    # Extract movie titles from the
    # data using regex
    title = soup.find_all("a",
                          attrs={"href": re.compile(r'\/title\/tt+\d*\/')})
    return title

示例#3

0

显示文件

文件： Movie_Recommendation_Script.py 项目： itsjatin135s/awesomeScripts

def main(emotion):

    if (emotion == "sad"):
        urlhere = 'http://www.imdb.com/search/title?genres=drama&title_type=feature&sort=moviemeter, asc'
    # movie against emotion Disgust
    elif (emotion == "disgust"):
        urlhere = 'http://www.imdb.com/search/title?genres=musical&title_type=feature&sort=moviemeter, asc'
    # IMDb Url for Family genre of
    # movie against emotion Anger
    elif (emotion == "anger"):
        urlhere = 'http://www.imdb.com/search/title?genres=family&title_type=feature&sort=moviemeter, asc'
    # IMDb Url for Thriller genre of
    # movie against emotion Anticipation
    elif (emotion == "anticipation"):
        urlhere = 'http://www.imdb.com/search/title?genres=thriller&title_type=feature&sort=moviemeter, asc'
    # IMDb Url for Sport genre of
    # movie against emotion Fear
    elif (emotion == "fear"):
        urlhere = 'http://www.imdb.com/search/title?genres=sport&title_type=feature&sort=moviemeter, asc'
    # IMDb Url for Thriller genre of
    # movie against emotion Enjoyment
    elif (emotion == "enjoyment"):
        urlhere = 'http://www.imdb.com/search/title?genres=thriller&title_type=feature&sort=moviemeter, asc'
    # IMDb Url for Western genre of
    # movie against emotion Trust
    elif (emotion == "trust"):
        urlhere = 'http://www.imdb.com/search/title?genres=western&title_type=feature&sort=moviemeter, asc'
    # IMDb Url for Film_noir genre of
    # movie against emotion Surprise
    elif (emotion == "surprise"):
        urlhere = 'http://www.imdb.com/search/title?genres=film_noir&title_type=feature&sort=moviemeter, asc'
    # HTTP request to get the data of
    # the whole page
    response = HTTP.get(urlhere)
    data = response.text
    # Parsing the data using
    # BeautifulSoup
    soup = SOUP(data, "lxml")
    # Extract movie titles from the
    # data using regex
    title = soup.find_all("a",
                          attrs={"href": re.compile(r'\/title\/tt+\d*\/')})
    return title

示例#4

0

显示文件

def main(emotion):

    # movie against emotion Sad
    if (emotion == "Sad"):
        urlhere = 'http://www.imdb.com/search/title?genres=drama&title_type=feature&sort=moviemeter, asc'

    # movie against emotion Disgust
    elif (emotion == "Disgust"):
        urlhere = 'http://www.imdb.com/search/title?genres=musical&title_type=feature&sort=moviemeter, asc'

    # movie against emotion Anger
    elif (emotion == "Anger"):
        urlhere = 'http://www.imdb.com/search/title?genres=family&title_type=feature&sort=moviemeter, asc'

    # movie against emotion Anticipation
    elif (emotion == "Anticipation"):
        urlhere = 'http://www.imdb.com/search/title?genres=thriller&title_type=feature&sort=moviemeter, asc'

    # movie against emotion Fear
    elif (emotion == "Fear"):
        urlhere = 'http://www.imdb.com/search/title?genres=sport&title_type=feature&sort=moviemeter, asc'

    # movie against emotion Enjoyment
    elif (emotion == "Enjoyment"):
        urlhere = 'http://www.imdb.com/search/title?genres=thriller&title_type=feature&sort=moviemeter, asc'

    # movie against emotion Trust
    elif (emotion == "Trust"):
        urlhere = 'http://www.imdb.com/search/title?genres=western&title_type=feature&sort=moviemeter, asc'

    # movie against emotion Surprise
    elif (emotion == "Surprise"):
        urlhere = 'http://www.imdb.com/search/title?genres=film_noir&title_type=feature&sort=moviemeter, asc'

    response = HTTP.get(urlhere)
    data = response.text

    soup = SOUP(data, "lxml")

    title = soup.find_all("a",
                          attrs={"href": re.compile(r'\/title\/tt+\d*\/')})
    return title

示例#5

0

显示文件

def get_movie(emotion):
    '''
	Function to web scrape from IMDb website by genre depending on user mood
	'''
    if (emotion == "neutral"):
        urlhere = 'http://www.imdb.com/search/title?genres=drama&title_type=feature&sort=moviemeter, asc'

    elif (emotion == "negative"):
        urlhere = 'http://www.imdb.com/search/title?genres=western&title_type=feature&sort=moviemeter, asc'

    elif (emotion == "positive"):
        urlhere = 'http://www.imdb.com/search/title?genres=sport&title_type=feature&sort=moviemeter, asc'

    response = HTTP.get(urlhere)
    data = response.text

    soup = SOUP(data, "lxml")

    title = soup.find_all("a",
                          attrs={"href": re.compile(r'\/title\/tt+\d*\/')})
    return title

示例#6

0

显示文件

文件： Movie_Emotions.py 项目： saftab/Movie-Recommender

def main(emotions):
    if emotions == "Sad":
        urlhere = 'http://www.imdb.com/search/title?genres=drama&title_type=feature&sort=moviemeter, asc'

    elif emotions == "Disgust":
        urlhere = 'http://www.imdb.com/search/title?genres=musical&title_type=feature&sort=moviemeter, asc'

    elif emotions == "Anger":
        urlhere = 'http://www.imdb.com/search/title?genres=family&title_type=feature&sort=moviemeter, asc'

    elif emotions == "Anticipation":
        urlhere = 'http://www.imdb.com/search/title?genres=thriller&title_type=feature&sort=moviemeter, asc'

    response = HTTP.get(urlhere)
    data = response.text

    soup = SOUP(data, "lxml")


    title = soup.find_all("a", attrs={"href": re.compile(r'\/title\/tt+\d*\/')})
    return title

示例#7

0

显示文件

def f4():
    count = 0
    l=[]
    emotion = "Anticipation"
    print("ANTICIPATION MOVIES HAIN")
    urlhere = 'http://www.imdb.com/search/title?genres=thriller&title_type=feature&sort=moviemeter, asc'
    response = HTTP.get(urlhere)
    data = response.text
    soup = SOUP(data, "lxml")
    title = soup.find_all("a", attrs={"href": re.compile(r'\/title\/tt+\d*\/')})
    for i in title:
        tmp = str(i).split('>')
        if (len(tmp) == 3):
            print(tmp[1][:-3])
            l.append(tmp[1][:-3])
        if (count > 11):
            break
        count += 1
    print()
    print()
    results['text'] = 'Anticipation Movies acc.to IMDB' + ':%s \n * %s \n * %s\n *%s\n *%s\n' % (l[0], l[1], l[2], l[3], l[4])
    l = []

示例#8

0

显示文件

文件： movie.py 项目： chiragm28/simple_movie_recommender

def scrapAndProcess(emotion):

    url = ""
    if (emotion == "sad"):
        url = 'http://www.imdb.com/search/title?genres=drama&title_type=feature&sort=moviemeter, asc'
    elif (emotion == "disgust"):
        url = 'http://www.imdb.com/search/title?genres=musical&title_type=feature&sort=moviemeter, asc'
    elif (emotion == "anger"):
        url = 'http://www.imdb.com/search/title?genres=family&title_type=feature&sort=moviemeter, asc'
    elif (emotion == "anticipation"):
        url = 'http://www.imdb.com/search/title?genres=thriller&title_type=feature&sort=moviemeter, asc'
    elif (emotion == "fear"):
        url = 'http://www.imdb.com/search/title?genres=sport&title_type=feature&sort=moviemeter, asc'
    elif (emotion == "enjoyment"):
        url = 'http://www.imdb.com/search/title?genres=thriller&title_type=feature&sort=moviemeter, asc'
    elif (emotion == "trust"):
        url = 'http://www.imdb.com/search/title?genres=western&title_type=feature&sort=moviemeter, asc'
    elif (emotion == "surprise"):
        url = 'http://www.imdb.com/search/title?genres=film_noir&title_type=feature&sort=moviemeter, asc'

    movies = []

    try:
        if not url:
            return movies
        response = HTTP.get(url)
        data = response.text
        soup = SOUP(data, "lxml")
        flags = ["None", "X", "\n"]
        for movieName in soup.findAll(
                'a', attrs={"href": re.compile(r'\/title\/tt+\d*\/')}):
            movieName = str(movieName.string)
            if movieName not in flags:
                movies.append(movieName)

    except Exception as e:
        print(e)

    return movies

示例#9

0

显示文件

文件： name.py 项目： NikilMunireddy/Emotion-movie-recommendation

def main(emotion):

    if (emotion == "Sad"):
        urlhere = 'http://www.imdb.com/search/title?genres=drama&amp;title_type=feature&amp;sort=moviemeter, asc'

    elif (emotion == "Disgust"):
        urlhere = 'http://www.imdb.com/search/title?genres=musical&amp;title_type=feature&amp;sort=moviemeter, asc'

    elif (emotion == "Anger"):
        urlhere = 'http://www.imdb.com/search/title?genres=family&amp;title_type=feature&amp;sort=moviemeter, asc'

    elif (emotion == "Anticipation"):
        urlhere = 'http://www.imdb.com/search/title?genres=thriller&amp;title_type=feature&amp;sort=moviemeter, asc'

    elif (emotion == "Fear"):
        urlhere = 'http://www.imdb.com/search/title?genres=sport&amp;title_type=feature&amp;sort=moviemeter, asc'

    elif (emotion == "Enjoyment"):
        urlhere = 'http://www.imdb.com/search/title?genres=thriller&amp;title_type=feature&amp;sort=moviemeter, asc'

    elif (emotion == "Trust"):
        urlhere = 'http://www.imdb.com/search/title?genres=western&amp;title_type=feature&amp;sort=moviemeter, asc'

    elif (emotion == "Surprise"):
        urlhere = 'http://www.imdb.com/search/title?genres=film_noir&amp;title_type=feature&amp;sort=moviemeter, asc'

    response = HTTP.get(urlhere)
    data = response.text

    # Parsing the data using
    # BeautifulSoup
    soup = SOUP(data, "lxml")

    # Extract movie titles from the
    # data using regex
    title = soup.find_all("a",
                          attrs={"href": re.compile(r'\/title\/tt+\d*\/')})
    return title

示例#10

0

显示文件

def target(emotion):
    url = ""
    if emotion == "disgust":
        url = 'http://www.imdb.com/search/title?genres=musical&amp;title_type=feature&amp;sort=moviemeter, asc'
    elif emotion == "sad":
        url = 'http://www.imdb.com/search/title?genres=drama&amp;title_type=feature&amp;sort=moviemeter, asc'
    elif emotion == "trust":
        url = 'http://www.imdb.com/search/title?genres=western&amp;title_type=feature&amp;sort=moviemeter, asc'
    elif emotion == "anger":
        url = 'http://www.imdb.com/search/title?genres=family&amp;title_type=feature&amp;sort=moviemeter, asc'
    elif emotion == "fear":
        url = 'http://www.imdb.com/search/title?genres=sport&amp;title_type=feature&amp;sort=moviemeter, asc'
    elif emotion == "anticipation" or "enjoyment":
        url = 'http://www.imdb.com/search/title?genres=thriller&amp;title_type=feature&amp;sort=moviemeter, asc'
    elif emotion == "surprise":
        url = 'http://www.imdb.com/search/title?genres=film_noir&amp;title_type=feature&amp;sort=moviemeter, asc'
    response = HTTP.get(url)
    data = response.text
    field = SOUP(data, "lxml")
    #REGEX EXTRACTION OF TITLES
    title = field.find_all("a",
                           attrs={"href": regex.compile(r'\/title\/tt+\d*\/')})
    return title

示例#11

0

显示文件

def recommend(emotion):
    if emotion=="Sad":
        urlhere = 'http://www.imdb.com/search/title?genres=drama&title_type=feature&sort=moviemeter, asc'
    elif(emotion == "Disgust"): 
        urlhere = 'http://www.imdb.com/search/title?genres=musical&title_type=feature&sort=moviemeter, asc'
    elif(emotion == "Anger"): 
        urlhere = 'http://www.imdb.com/search/title?genres=family&title_type=feature&sort=moviemeter, asc'
    elif(emotion == "Anticipation"): 
        urlhere = 'http://www.imdb.com/search/title?genres=thriller&title_type=feature&sort=moviemeter, asc'
    elif(emotion == "Fear"): 
        urlhere = 'http://www.imdb.com/search/title?genres=sport&title_type=feature&sort=moviemeter, asc'
    elif(emotion == "Enjoyment"): 
        urlhere = 'http://www.imdb.com/search/title?genres=thriller&title_type=feature&sort=moviemeter, asc'
    elif(emotion == "Trust"): 
        urlhere = 'http://www.imdb.com/search/title?genres=western&title_type=feature&sort=moviemeter, asc'
    elif(emotion == "Surprise"): 
        urlhere = 'http://www.imdb.com/search/title?genres=film_noir&title_type=feature&sort=moviemeter, asc'
    
    ##web scraping to get HTML
    response=HTTP.get(urlhere)
    info=response.text
    soup=SOUP(info,"lxml") ##parsing to form tree
    name=soup.find_all("a",attrs={"href":re.compile(r'\/title\/tt+\d*\/')})
    return name

示例#12

0

显示文件

def main(emotion): 
  
    
    if(emotion == "Sad" or emotion == "sad"): 
        urlhere = 'https://www.imdb.com/list/ls009576722/'
  
    elif(emotion == "Disgust" or emotion == "disgust"): 
        urlhere = 'https://www.imdb.com/list/ls075745491/'
  
    elif(emotion == "Anger" or emotion == "anger"): 
        urlhere = 'https://www.imdb.com/list/ls000445157/'
  
    elif(emotion == "Anticipation" or emotion == "anticipation"): 
        urlhere = 'https://www.imdb.com/india/upcoming/'
  
    elif(emotion == "Fear" or emotion == "fear"): 
        urlhere = 'https://www.imdb.com/list/ls058201636/'
  
    elif(emotion == "Enjoyment" or emotion == "enjoyment"): 
        urlhere = 'https://www.imdb.com/list/ls005597767/'
  
    elif(emotion == "Trust" or emotion == "trust"): 
        urlhere = 'https://www.imdb.com/list/ls051594496/'
  
    elif(emotion == "Surprise" or emotion == "surprise"): 
        urlhere = 'https://www.imdb.com/list/ls008944391/'
  
    
    response = HTTP.get(urlhere) 
    data = response.text 
  
    
    soup = SOUP(data, "html.parser")

    title = soup.find_all("a", attrs = {"href" : re.compile(r'\/title\/tt+\d*\/')}) 
    return title

示例#13

0

显示文件

文件： scraper.py 项目： vilavivida/MovieMood

def scrape_IMDB(IMDB, num, folder_path=None):
    folder_path = "movie_summary/"  # you only need the folder_path when you need to store movie summary
    response = requests.get(IMDB)
    data = SOUP(response.text, 'lxml')

    # we hope to have movie's name, grading, runtime, and rating
    IMDB_dict = {}
    title_lst = []
    num_reviews = []

    # IMDB lists top 50 from each genre

    for movie in data.findAll('div', class_="lister-item-content"):
        # title
        title = movie.find("a",
                           attrs={"href": re.compile(r'\/title\/tt+\d*\/')})
        title = str(title).split('">')[1].split('</')[0]
        IMDB_dict[title] = []
        title_lst.append(title)

        # movie summary
        summary = movie.findAll('p', {'class': 'text-muted'})
        if summary != None:
            summary = str(summary).split(
                ', <p class="text-muted">')[1].replace("\n", "").replace(
                    "</p>]", "")  #clean the summary text
            IMDB_dict[title].append(summary)

        # grading
        grading = movie.find('span', class_="certificate")
        if grading != None:
            grading = str(grading).split('">')[1].split('</')[0]
        else:
            grading = "Not Found"
        IMDB_dict[title].append(grading)
        # runtime
        length = movie.find('span', class_="runtime")
        if length != None:
            length = str(length).split('">')[1].split('</')[0]
        else:
            length = "Not Found"
        IMDB_dict[title].append(length)

    # No. of reviewers
    for title, movie in zip(title_lst,
                            data.findAll('p',
                                         class_="sort-num_votes-visible")):
        numRater = int(re.sub("[^0-9]", "", movie.text))
        num_reviews.append(numRater)

    # rating
    for review, title, movie in zip(num_reviews, title_lst,
                                    data.findAll('div', class_="ratings-bar")):
        rating = movie.find('div', class_="inline-block ratings-imdb-rating")
        try:
            rating = float(
                re.search(r'[\d]*[.][\d]+',
                          str(rating).split(' ')[3]).group())
        except AttributeError:
            rating = float(
                re.search(r'\d+',
                          str(rating).split(' ')[3]).group())

        # score adjustments based on number of reviewers through logistic regression

        weightedRating = rating * log(log(review, 5), 10)
        weightedRating = round(weightedRating, 1)

        IMDB_dict[title].append(weightedRating)

    ranked_dict = rank_movies(IMDB_dict)
    ranked_dict = dict(list(ranked_dict.items())[0:num])

    # print(ranked_dict)

    return ranked_dict

示例#14

0

显示文件

    engine = pyttsx3.init()
    engine.setProperty(
        'voice',
        'HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Speech\Voices\Tokens\TTS_MS_EN-US_ZIRA_11.0'
    )
    engine.say(y)
    engine.setProperty('rate', 1)  #120 words per minute
    engine.setProperty('volume', 1)
    engine.runAndWait()


Say('getting news headlines')
urlhere = ('https://www.ndtv.com/top-stories')
response = HTTP.get(urlhere)
data = response.text
soup = SOUP(data, "lxml")

file = open('MyFile.txt', 'w')
file.write('')
file.close()

file1 = open("MyFile.txt", "a")
i = 1

for title in soup.findAll('h2', attrs={"class": re.compile('nstory_header')}):
    print(title.string)
    if i <= 7:
        title = title.string

        title = title.split()
        str1 = '+'

示例#15

0

显示文件

文件： real_time_video.py 项目： silpasreeni99/movie-recommendation-system-using-emotion-recognition

        def main(emotion):
            em=emotion.lower()
            # IMDb Url for Drama genre of 
            # movie against emotion Sad 
            if(em == "sad"):
                print("EMOTION DETECTED:",em)
                urlhere = 'http://www.imdb.com/search/title?genres=drama&title_type=feature&sort=moviemeter, asc'
                

            # IMDb Url for Musical genre of 
            # movie against emotion Disgust 
            elif(em == "disgust"):
                print("EMOTION DETECTED:",em)
                urlhere = 'http://www.imdb.com/search/title?genres=musical&title_type=feature&sort=moviemeter, asc'

            # IMDb Url for Family genre of 
            # movie against emotion Anger 
            elif(em == "angry"):
                print("EMOTION DETECTED:",em)
                urlhere = 'http://www.imdb.com/search/title?genres=family&title_type=feature&sort=moviemeter, asc'

            # IMDb Url for Thriller genre of 
            # movie against emotion Anticipation 
            elif(em == "neutral"):
                print("EMOTION DETECTED:",em)
                urlhere = 'http://www.imdb.com/search/title?genres=thriller&title_type=feature&sort=moviemeter, asc'

            # IMDb Url for Sport genre of 
            # movie against emotion Fear 
            elif(em == "scared"):
                print("EMOTION DETECTED:",em)
                urlhere = 'http://www.imdb.com/search/title?genres=sport&title_type=feature&sort=moviemeter, asc'

            # IMDb Url for Thriller genre of 
            # movie against emotion Enjoyment 
            elif(em == "happy"):
                print("EMOTION DETECTED:",em)
                urlhere = 'http://www.imdb.com/search/title?genres=thriller&title_type=feature&sort=moviemeter, asc'

            
            

            # IMDb Url for Film_noir genre of 
            # movie against emotion Surprise 
            elif(em == "surprised"):
                print("EMOTION DETECTED:",em)
                urlhere = 'http://www.imdb.com/search/title?genres=film_noir&title_type=feature&sort=moviemeter, asc'

            # HTTP request to get the data of 
            # the whole page 
            response = HTTP.get(urlhere) 
            data = response.text 

            # Parsing the data using 
            # BeautifulSoup 
            soup = SOUP(data, "lxml") 

            # Extract movie titles from the 
            # data using regex 
            title = soup.find_all("a", attrs = {"href" : re.compile(r'\/title\/tt+\d*\/')})
            title1 = soup.find_all("h3",{"class":"lister-item-header"})
            print("LIST OF APT MOVIES BASED ON USERS CURRENT EMOTION:")
            print(title1[0].text)
            rating = soup.find_all("div", {"class": "inline-block ratings-imdb-rating"})
            print("rating=",rating[0].text)
            print(title1[1].text)
            print("rating=",rating[1].text)
            print(title1[2].text)
            print("rating=",rating[2].text)
            print(title1[3].text)
            print("rating=",rating[3].text)
            print(title1[4].text)
            print("rating=",rating[4].text)
            print(title1[5].text)
            print("rating=",rating[5].text)
            print(title1[6].text)
            print("rating=",rating[6].text)
            print(title1[7].text)
            print("rating=",rating[7].text)
            print(title1[8].text)
            print("rating=",rating[8].text)
            print(title1[9].text)
            print("rating=",rating[9].text)
            return title1

示例#16

0

显示文件

文件： scraper.py 项目： vilavivida/MovieMood

def scrape_rt(RT, num):
    response = requests.get(RT)
    data = SOUP(response.text, 'lxml')
    RT_dict = {}
    title_lst = []
    rel_lst = []
    reviews_lst = []

    # Rotten Tomatoes lists top 100 from each genre

    # as above, we hope to obtain name, grading, runtime, and rating
    for movie in data.findAll('tr'):
        # title
        title = movie.find("a", class_="unstyled articleLink")
        if title != None:
            cleanTitle = str(title).split('">')[1].split(" (")[0].strip(
                '\n').strip()
            RT_dict[cleanTitle] = []
            title_lst.append(cleanTitle)  #100

            # link to movie profile
            rel_link = str(title).split('href="')[1].split('">\n')[0]
            link = "https://www.rottentomatoes.com/" + rel_link
            RT_dict[cleanTitle].append(link)

        # numbers of reviews:
        num_reviews = movie.find('td', class_="right hidden-xs")
        if num_reviews != None:
            num_reviews = int(
                str(num_reviews).split('">')[1].split('</')[0])  #100

            # collect number of reviewers for later movie score adjustments
            reviews_lst.append(num_reviews)

    # rating
    for review, title, movie in zip(
            reviews_lst, title_lst,
            data.findAll('span', class_='tMeterIcon tiny')):
        rating = movie.find('span', class_="tMeterScore")
        rating = str(rating).split('">\xa0')[1].split('%</')[0]
        # transform RT rating into the same scale as IMDB rating (out of 10)
        weightedRating = int(rating) / 10

        # score adjustments
        weightedRating = weightedRating * log(log(review, 4), 5)
        weightedRating = round(weightedRating, 1)
        RT_dict[title].append(weightedRating)

    # to increase the efficiency of the script,
    # we are going to rank movies based on rating
    # and only look up movie profiles of top-ranked movies

    ranked_dict = rank_movies(RT_dict)
    ranked_dict = dict(list(ranked_dict.items())[0:num])
    for value in ranked_dict.values():
        rel_lst.append(value[0])
        value.pop(0)

    new_title_lst = list(ranked_dict.keys())

    # # grading and runtime information are inside movie profile links

    for title, link in zip(new_title_lst, rel_lst):
        response = requests.get(link)
        data_1 = SOUP(response.text, 'lxml')

        #movie summary
        for div_tag in data_1.findAll(
                'div', {'class': 'movie_synopsis clamp clamp-6 js-clamp'}):
            summary = str(div_tag.text).replace("\n", "")
            ranked_dict[title].insert(0, summary)

        for div_tag in data_1.findAll('li', {'class': 'meta-row clearfix'}):
            movie_label = div_tag.find('div', {
                'class': 'meta-label subtle'
            }).text
            if movie_label == "Rating:":
                rating_info = div_tag.find('div', {'class': 'meta-value'}).text
                rating_info = rating_info.replace("\n", "").replace(" ", "")
                ranked_dict[title].insert(1, rating_info)
            elif movie_label == "Runtime:":
                runtime_info = div_tag.find('div', {
                    'class': 'meta-value'
                }).text
                runtime_info = runtime_info.replace("\n", "").replace(" ", "")

                ranked_dict[title].insert(2, runtime_info)

    return ranked_dict

示例#17

0

显示文件

文件： ImdbWebScrape.py 项目： gsamba92/Movie-Recommendation-application-based-on-sentiment-Analysis

def getMovies(emotion):
        urlhere="No url"
        data = []
	# IMDb Url for Drama genre of
	# movie against emotion Sad
        if(emotion == "Sad"):
                urlhere = 'http://www.imdb.com/search/title?genres=drama&amp;title_type=feature&amp;sort=moviemeter, asc'

	# IMDb Url for Musical genre of
	# movie against emotion Disgust
        elif(emotion == "Disgust"):
                urlhere = 'http://www.imdb.com/search/title?genres=musical&amp;title_type=feature&amp;sort=moviemeter, asc'

	# IMDb Url for Family genre of
	# movie against emotion Anger
        elif(emotion == "Angry"):
                urlhere = 'http://www.imdb.com/search/title?genres=family&amp;title_type=feature&amp;sort=moviemeter, asc'


	# IMDb Url for Thriller genre of
	# movie against emotion Anticipation
        elif(emotion == "Anticipation"):
                urlhere = 'http://www.imdb.com/search/title?genres=thriller&amp;title_type=feature&amp;sort=moviemeter, asc'

	# IMDb Url for Sport genre of
	# movie against emotion Fear
        elif(emotion == "Surprised"):
                urlhere = 'http://www.imdb.com/search/title?genres=sport&amp;title_type=feature&amp;sort=moviemeter, asc'

	# IMDb Url for Thriller genre of
	# movie against emotion Enjoyment
        elif(emotion == "Happy"):
                urlhere = 'http://www.imdb.com/search/title?genres=thriller&amp;title_type=feature&amp;sort=moviemeter, asc'

	# IMDb Url for Western genre of
	# movie against emotion Trust
        elif(emotion == "Trust"):
                urlhere = 'http://www.imdb.com/search/title?genres=western&amp;title_type=feature&amp;sort=moviemeter, asc'

	# IMDb Url for Film_noir genre of
	# movie against emotion Surprise
        elif(emotion == "Surprise"):
                urlhere = 'http://www.imdb.com/search/title?genres=film_noir&amp;title_type=feature&amp;sort=moviemeter, asc'
        if(urlhere != "No url"):
                
                # HTTP request to get the data of
                # the whole page
                response = HTTP.get(urlhere)
                data = response.text

                # Parsing the data using
                # BeautifulSoup
                soup = SOUP(data, "lxml")

                # Extract movie titles from the
                # data using regex
                
                samples = soup.find_all("div", "lister-item")
                ratings = soup.find_all("div","ratings-bar")
                #print(ratings[0].contents[1].attrs['data-value'])
                #data = []
                name = []
                img = []
                rating = []
                for a in samples:
                    name.append(a.contents[5].contents[1].contents[3].text)
                    img.append(a.contents[3].contents[1].contents[1].attrs['loadlate'])
                    
                for rate in ratings:
                    rating.append(rate.contents[1].attrs['data-value'])
                    

                data = zip(name,img,rating)
                data = list(data)                
        return data

示例#18

0

显示文件

文件： movieRecommendation.py 项目： rohan-gupta/Emotional-Guru

def recommendation(emotion):

    movies = []

    qw = {
        'sad': 'comedy.txt',
        'disgust': 'romance.txt',
        'anger': 'fantasy.txt',
        'anticipation': 'mystery.txt',
        'fear': 'animation.txt',
        'enjoyment': 'western.txt',
        'trust': 'music.txt',
        'surprise': 'horror.txt'
    }
    for k, v in qw.items():

        if k == emotion:
            i = 0
            v = 'Recommendation/movies/' + v
            for line in open(v, 'r').readlines():
                movies.append(line)
                if i > 2:
                    break

                i += 1

    if emotion == "sad":
        urlhere = 'http://www.imdb.com/search/title?genres=drama&title_type=feature&sort=moviemeter,asc'

    elif emotion == "disgust":
        urlhere = 'http://www.imdb.com/search/title?genres=musical&title_type=feature&sort=moviemeter,asc'

    elif emotion == "anger":
        urlhere = 'http://www.imdb.com/search/title?genres=family&title_type=feature&sort=moviemeter,asc'

    elif emotion == "anticipation":
        urlhere = 'http://www.imdb.com/search/title?genres=thriller&title_type=feature&sort=moviemeter,asc'

    elif emotion == "fear":
        urlhere = 'http://www.imdb.com/search/title?genres=sport&title_type=feature&sort=moviemeter,asc'

    elif emotion == "enjoyment":
        urlhere = 'http://www.imdb.com/search/title?genres=thriller&title_type=feature&sort=moviemeter,asc'

    elif emotion == "trust":
        urlhere = 'http://www.imdb.com/search/title?genres=western&title_type=feature&sort=moviemeter,asc'

    elif emotion == "surprise":
        urlhere = 'http://www.imdb.com/search/title?genres=film_noir&title_type=feature&sort=moviemeter,asc'

    response = http.get(urlhere)

    data = response.text
    soup = SOUP(data, "lxml")
    title = soup.find_all("a",
                          attrs={"href": re.compile(r'\/title\/tt+\d*\/')})

    a = title
    count = 0

    if emotion == "disgust" or emotion == "anger" or emotion == "surprise":

        for i in a:
            tmp = str(i).split('>')
            if len(tmp) == 3:
                movies.append(tmp[1][:-3])
            if count > 2:
                break
            count += 1
    else:

        for i in a:
            tmp = str(i).split('>')
            if len(tmp) == 3:
                movies.append(tmp[1][:-3])
            if count > 2:
                break
            count += 1

    return movies

示例#19

0

显示文件

def extract_emote(urlhere):
    response = HTTP.get(urlhere)
    data = response.text
    soup = SOUP(data, "lxml")
    return soup

示例#20

0

显示文件

def Output(Page1, genre, fgc, bgc):
    Page1.destroy()
    Page2 = Canvas(root)
    Page2.configure(bg='#cce6ff')

    heading = "TOP 50 MOVIES OF " + genre.upper() + " GENRE ARE :"
    head = Label(Page2,
                 text=heading,
                 fg="black",
                 bg='#1affff',
                 height=2,
                 width=50,
                 font=("Times New Roman", 16, 'bold'))
    head.grid(row=0, column=0, columnspan=5)

    ###main heart/core of the application
    movies_display = "RANK\tMOVIE NAME\t\t\t\tYEAR\t        RATING\n"
    movies_display += ('-' * 5 + '\t' + '-' * 35 + '\t\t\t\t' + '-' * 7 +
                       '\t        ' + '-' * 10 + '\n')
    url = 'https://www.imdb.com/search/title/?genres=' + genre + '&explore=title_type,genres&pf_rd_m=A2FGELUUNOQJNL&pf_rd_p=3396781f-d87f-4fac-8694-c56ce6f490fe&pf_rd_r=HJGEZ0WGPGEPNBKSJH8A&pf_rd_s=center-1&pf_rd_t=15051&pf_rd_i=genre&ref_=ft_gnr_pr1_i_3'
    response = HTTP.get(url)  ##to get to whole page content
    data = response.text
    soup = SOUP(data, "lxml")  ##parsing the data to xml format
    movies = soup.find_all('div', {'class': 'lister-item-content'})

    ind = 1
    for movie in movies:
        title = movie.find_all('a')
        title = str(title).split('>')

        year = movie.find_all('span',
                              {'class': 'lister-item-year text-muted unbold'})
        year = str(year).split('>')

        rating = movie.find_all('strong')
        rating = str(rating).split('>')

        title_final = title[1][:-3]

        if (len(rating) == 1):
            movies_display += str(
                ind) + "\t" + title_final[:35] + "\t\t\t\t" + year[
                    1][:-6] + "\t\t" + "---\n"
        else:
            movies_display += str(
                ind) + "\t" + title_final[:35] + "\t\t\t\t" + year[
                    1][:-6] + "\t\t" + rating[1][:3] + "\n"
        ind += 1
    ##

    display = ScrolledText(Page2,
                           width=60,
                           height=25,
                           bg=bgc,
                           fg=fgc,
                           font=("Comic Sans MS", 12))
    display.insert(END, movies_display)
    display.grid(row=1, column=0, columnspan=5, rowspan=5, padx=10, pady=10)
    display.configure(state=DISABLED)

    back_button = Button(text="Back",
                         fg="white",
                         bg='#003366',
                         height=1,
                         width=8,
                         font=("Comic Sans MS", 14),
                         command=lambda: Input(Page2))
    back = Page2.create_window(10, 690, anchor=SW, window=back_button)

    end_button = Button(text="Quit",
                        fg="white",
                        bg='#660000',
                        height=1,
                        width=8,
                        font=("Comic Sans MS", 14),
                        command=Finish)
    back = Page2.create_window(630, 690, anchor=SE, window=end_button)

    Page2.pack()

示例#21

0

显示文件

        def main(emotion):
            em = emotion.lower()
            import tkinter as tk
            from tkinter import messagebox
            root = tk.Tk()
            root.withdraw()

            msgbox = tk.messagebox.showinfo('EMOTION', em.upper())

            # IMDb Url for Drama genre of
            # movie against emotion Sad
            if (em == "sad"):
                root11 = tk.Tk()
                root11.withdraw()
                msgbox1 = tk.messagebox.showinfo('GENRE ALLOCATED',
                                                 'DRAMA......')
                urlhere = 'http://www.imdb.com/search/title?genres=drama&title_type=feature&sort=moviemeter, asc'

            # IMDb Url for Musical genre of
            # movie against emotion Disgust
            elif (em == "disgust"):
                root11 = tk.Tk()
                root11.withdraw()
                msgbox1 = tk.messagebox.showinfo('GENRE ALLOCATED',
                                                 'MUSICAL........')

                urlhere = 'http://www.imdb.com/search/title?genres=musical&title_type=feature&sort=moviemeter, asc'

            # IMDb Url for Family genre of
            # movie against emotion Anger
            elif (em == "angry"):
                root11 = tk.Tk()
                root11.withdraw()
                msgbox1 = tk.messagebox.showinfo('GENRE ALLOCATED',
                                                 'FAMILY.......')

                urlhere = 'http://www.imdb.com/search/title?genres=family&title_type=feature&sort=moviemeter, asc'

            # IMDb Url for Thriller genre of
            # movie against emotion Anticipation
            elif (em == "neutral"):
                root11 = tk.Tk()
                root11.withdraw()
                msgbox1 = tk.messagebox.showinfo('GENRE ALLOCATED',
                                                 'THRILLER........')

                urlhere = 'http://www.imdb.com/search/title?genres=thriller&title_type=feature&sort=moviemeter, asc'

            # IMDb Url for Sport genre of
            # movie against emotion Fear
            elif (em == "scared"):
                root11 = tk.Tk()
                root11.withdraw()
                msgbox1 = tk.messagebox.showinfo('GENRE ALLOCATED',
                                                 'SPORT.........')

                urlhere = 'http://www.imdb.com/search/title?genres=sport&title_type=feature&sort=moviemeter, asc'

            # IMDb Url for Thriller genre of
            # movie against emotion Enjoyment
            elif (em == "happy"):
                root11 = tk.Tk()
                root11.withdraw()
                msgbox1 = tk.messagebox.showinfo('GENRE ALLOCATED',
                                                 'THRILLER.........')

                urlhere = 'http://www.imdb.com/search/title?genres=thriller&title_type=feature&sort=moviemeter, asc'

            # IMDb Url for Film_noir genre of
            # movie against emotion Surprise
            elif (em == "surprised"):
                root11 = tk.Tk()
                root11.withdraw()
                msgbox1 = tk.messagebox.showinfo('GENRE ALLOCATED',
                                                 'FILM_NOIR.........')

                urlhere = 'http://www.imdb.com/search/title?genres=film_noir&title_type=feature&sort=moviemeter, asc'

            # HTTP request to get the data of
            # the whole page
            response = HTTP.get(urlhere)
            data = response.text

            # Parsing the data using
            # BeautifulSoup
            soup = SOUP(data, "lxml")

            # Extract movie titles from the
            # data using regex
            title = soup.find_all(
                "a", attrs={"href": re.compile(r'\/title\/tt+\d*\/')})
            title1 = soup.find_all("h3", {"class": "lister-item-header"})
            rating = soup.find_all(
                "div", {"class": "inline-block ratings-imdb-rating"})
            import tkinter
            from tkinter import ttk

            root12 = tkinter.Tk()
            root12.geometry("600x600")
            root12.title("MOVIES RECOMMENDED FOR DETECTED EMOTION")
            tree = ttk.Treeview(root12)
            tree["columns"] = ("one", "two")
            tree.column("one", width=200)
            tree.column("two", width=200)

            style = ttk.Style(root12)
            style.configure('Treeview', rowheight=45)

            tree.heading("one", text="MOVIES")
            tree.heading("two", text="RATINGS")
            for i in range(9, -1, -1):
                tree.insert("",
                            0,
                            text="",
                            values=(title1[i].text, rating[i].text))
            tree.pack()

            return title1

示例#22

0

显示文件

def scrapAndProcess(emotion):

    # URL to which GET request will be made
    url = ""

    # IMDb URL for Drama genre of movie against emotion Sad
    if (emotion == "sad"):
        url = 'http://www.imdb.com/search/title?genres=drama&title_type=feature&sort=moviemeter, asc'

    # IMDb URL for Musical genre of movie against emotion Disgust
    elif (emotion == "disgust"):
        url = 'http://www.imdb.com/search/title?genres=musical&title_type=feature&sort=moviemeter, asc'

    # IMDb URL for Family genre of movie against emotion Anger
    elif (emotion == "anger"):
        url = 'http://www.imdb.com/search/title?genres=family&title_type=feature&sort=moviemeter, asc'

    # IMDb URL for Thriller genre of movie against emotion Anticipation
    elif (emotion == "anticipation"):
        url = 'http://www.imdb.com/search/title?genres=thriller&title_type=feature&sort=moviemeter, asc'

    # IMDb URL for Sport genre of movie against emotion Fear
    elif (emotion == "fear"):
        url = 'http://www.imdb.com/search/title?genres=sport&title_type=feature&sort=moviemeter, asc'

    # IMDb URL for Thriller genre of movie against emotion Enjoyment
    elif (emotion == "enjoyment"):
        url = 'http://www.imdb.com/search/title?genres=thriller&title_type=feature&sort=moviemeter, asc'

    # IMDb URL for Western genre of movie against emotion Trust
    elif (emotion == "trust"):
        url = 'http://www.imdb.com/search/title?genres=western&title_type=feature&sort=moviemeter, asc'

    # IMDb URL for Film_noir genre of movie against emotion Surprise
    elif (emotion == "surprise"):
        url = 'http://www.imdb.com/search/title?genres=film_noir&title_type=feature&sort=moviemeter, asc'

    # List to store all movie names
    movies = []

    # Try catch block to prevent abrupt termination of code if IMDb server is down
    try:
        # If entered emotion is not from one of the above, return empty movies list
        if not url:
            return movies

        # HTTP request to get the data of the whole page
        response = HTTP.get(url)

        # Accessing the text property of the response object
        data = response.text

        # Parsing the data using BeautifulSoup
        soup = SOUP(data, "lxml")

        # Pruning noisy data - the elements in this list can appear as movie names
        flags = ["None", "X", "\n"]

        # Extract movie titles from the data using regex
        for movieName in soup.findAll(
                'a', attrs={"href": re.compile(r'\/title\/tt+\d*\/')}):

            # Converting from bs4.element.NavigableString to python3 string
            movieName = str(movieName.string)

            # Checking if movie name is not in noisy data list
            if movieName not in flags:
                movies.append(movieName)

    # Catch exceptions - they might occur if the IMDb server is down
    except Exception as e:
        print(e)

    return movies

示例#23

0

显示文件

文件： actions.py 项目： shreyasdvrj/MovieBot_MiniProject

        def main(emotion):
            # IMDb Url for Comedy Drama genre of
            # movie against emotion Sad
            if (emotion == "Sad"):
                urlhere = 'https://www.imdb.com/search/title/?title_type=feature&genres=comedy&sort=boxoffice_gross_us,desc&explore=genres'

            # IMDb Url for feel good genre of
            # movie against emotion Happy.
            elif (emotion == "Happy"):
                urlhere = 'https://www.imdb.com/list/ls068773014/?sort=user_rating,desc&st_dt=&mode=detail&page=1'

            # IMDb Url for Action and SciFi genre of
            # movie against emotion Excitement.
            elif (emotion == "Excitement"):
                urlhere = 'https://www.imdb.com/search/title/?count=100&genres=action&release_date=2019,2019&title_type=feature'

            # IMDb Url for Musical genre of
            # movie against emotion Disgust
            elif (emotion == "Disgust"):
                urlhere = 'http://www.imdb.com/search/title?genres=musical&title_type=feature&sort=moviemeter, asc'

            # IMDb Url for Family genre of
            # movie against emotion Anger
            elif (emotion == "Anger"):
                urlhere = 'https://www.imdb.com/list/ls076036380/?sort=user_rating,desc&st_dt=&mode=detail&page=1'

            # IMDb Url for Sport genre of
            # movie against emotion Fear
            elif (emotion == "Fear"):
                urlhere = 'http://www.imdb.com/search/title?genres=sport&title_type=feature&sort=moviemeter, asc'

            # IMDb Url for Thriller genre of
            # movie against emotion Enjoyment
            elif (emotion == "Enjoyment"):
                urlhere = 'http://www.imdb.com/search/title?genres=thriller&title_type=feature&sort=moviemeter, asc'

            # IMDb Url for Top Rated Movies.
            # movie against no emotion entered.
            elif (emotion == ""):
                urlhere = 'https://www.imdb.com/chart/top?ref_=nv_mv_250'

            # IMDb Url for Western genre of
            # movie against emotion Trust
            elif (emotion == "Trust"):
                urlhere = 'http://www.imdb.com/search/title?genres=western&title_type=feature&sort=moviemeter, asc'

            # IMDb Url for Film_noir genre of
            # movie against emotion Surprise
            elif (emotion == "Surprise"):
                urlhere = 'http://www.imdb.com/search/title?genres=film_noir&title_type=feature&sort=moviemeter, asc'

            # HTTP request to get the data of
            # the whole page
            response = HTTP.get(urlhere)
            data = response.text

            # Parsing the data using
            # BeautifulSoup
            soup = SOUP(data, "lxml")

            # Extract movie titles from the
            # data using regex
            title = soup.find_all(
                "a", attrs={"href": re.compile(r'\/title\/tt+\d*\/')})
            return title

示例#24

0

显示文件

文件： backend.py 项目： mikpim01/Movie_Recommender

c = 0
for r in range(24):
    print(genres[r],end="\t\t")
    c+=1
    if c==5:
        c = 0
        print()
print("\n"+"-"*30)
###--------------

genre = input("Enter a genre to search : ")
url = 'https://www.imdb.com/search/title/?genres='+genre+'&explore=title_type,genres&pf_rd_m=A2FGELUUNOQJNL&pf_rd_p=3396781f-d87f-4fac-8694-c56ce6f490fe&pf_rd_r=HJGEZ0WGPGEPNBKSJH8A&pf_rd_s=center-1&pf_rd_t=15051&pf_rd_i=genre&ref_=ft_gnr_pr1_i_3'

response = HTTP.get(url)        ##to get to whole page content
data = response.text
soup = SOUP(data,"lxml")        ##parsing the data to xml format

movies = soup.find_all('div',{'class':'lister-item-content'})

##---print header----
print("-"*30)
print("S.No\tName |||  Year  |||  Rating")
print("-"*30)
##-------------------
ind = 1
for movie in movies:
    title = movie.find_all('a')
    title = str(title).split('>')

    year = movie.find_all('span',{'class':'lister-item-year text-muted unbold'})
    year = str(year).split('>')

示例#25

0

显示文件

文件： movie_review.py 项目： swapnilbaluja/Movie-Recommender

def main(emotion):
 
    # IMDb Url for Drama genre of
    # movie against emotion Sad
    if(emotion == "Sad"):
        urlhere = 'http://www.imdb.com/search/title?genres=drama&amp;title_type=feature&amp;sort=moviemeter, asc'
 
    # IMDb Url for Musical genre of
    # movie against emotion Disgust
    elif(emotion == "Disgust"):
        urlhere = 'http://www.imdb.com/search/title?genres=musical&amp;title_type=feature&amp;sort=moviemeter, asc'
 
    # IMDb Url for Family genre of
    # movie against emotion Anger
    elif(emotion == "Anger"):
        urlhere = 'http://www.imdb.com/search/title?genres=family&amp;title_type=feature&amp;sort=moviemeter, asc'
 
    # IMDb Url for Thriller genre of
    # movie against emotion Anticipation
    elif(emotion == "Anticipation"):
        urlhere = 'http://www.imdb.com/search/title?genres=thriller&amp;title_type=feature&amp;sort=moviemeter, asc'
 
    # IMDb Url for Sport genre of
    # movie against emotion Fear
    elif(emotion == "Fear"):
        urlhere = 'http://www.imdb.com/search/title?genres=sport&amp;title_type=feature&amp;sort=moviemeter, asc'
 
    # IMDb Url for Thriller genre of
    # movie against emotion Enjoyment
    elif(emotion == "Enjoyment"):
        urlhere = 'http://www.imdb.com/search/title?genres=thriller&amp;title_type=feature&amp;sort=moviemeter, asc'
 
    # IMDb Url for Western genre of
    # movie against emotion Trust
    elif(emotion == "Trust"):
        urlhere = 'http://www.imdb.com/search/title?genres=western&amp;title_type=feature&amp;sort=moviemeter, asc'
 
    # IMDb Url for Film_noir genre of
    # movie against emotion Surprise
    elif(emotion == "Surprise"):
        urlhere = 'http://www.imdb.com/search/title?genres=film_noir&amp;title_type=feature&amp;sort=moviemeter, asc'
    else:
        print("Wrong emotion")
        return 
    # HTTP request to get the data of
    # the whole page
    response = HTTP.get(urlhere)
    data = response.text
   
 
    # Parsing the data using
    # BeautifulSoup
    soup = SOUP(data, "lxml")
 
    # Extract movie titles from the
    # data using regex
    title = soup.find_all("a", attrs = {"href" : re.compile(r'\/title\/tt+\d*\/')})
    print('Best Movies For Gener  '+emotion+'  are:')
    var=1
    for t in soup.findAll("h3",attrs={'class':'lister-item-header'}):
        print (str(var)+" "+t.find('a').contents[0])
        var=var+1
	    
    return title