def f1(): count = 0 emotion = "Sad" print("SAD MOVIES HAIN") urlhere = 'http://www.imdb.com/search/title?genres=drama&title_type=feature&sort=moviemeter, asc' response = HTTP.get(urlhere) data = response.text soup = SOUP(data, "lxml") title = soup.find_all("a", attrs={"href": re.compile(r'\/title\/tt+\d*\/')}) for i in title: tmp = str(i).split('>') if (len(tmp) == 3): print(tmp[1][:-3]) if (count > 11): break count += 1 print() print()
def main(emotion): if (emotion == 2): urlhere = 'http://www.imdb.com/search/title?genres=drama&title_type=feature&sort=moviemeter, asc' # IMDb Url for Musical genre of # movie against emotion Disgust elif (emotion == 4): urlhere = 'http://www.imdb.com/search/title?genres=musical&title_type=feature&sort=moviemeter, asc' # IMDb Url for Family genre of # movie against emotion Anger elif (emotion == 1): urlhere = 'http://www.imdb.com/search/title?genres=family&title_type=feature&sort=moviemeter, asc' # IMDb Url for Thriller genre of # movie against emotion Anticipation elif (emotion == 5): urlhere = 'https://www.imdb.com/search/title/?genres=happy&title_type=feature&sort=moviemeter, asc' # IMDb Url for Sport genre of # movie against emotion Fear elif (emotion == 3): urlhere = 'http://www.imdb.com/search/title?genres=sport&title_type=feature&sort=moviemeter, asc' # IMDb Url for Thriller genre of # movie against emotion Enjoyment elif (emotion == 6): urlhere = 'https://www.imdb.com/search/title/?genres=comedy&title_type=feature&sort=moviemeter, asc' # HTTP request to get the data of # the whole page response = HTTP.get(urlhere) data = response.text # Parsing the data using # BeautifulSoup soup = SOUP(data, "lxml") # Extract movie titles from the # data using regex title = soup.find_all("a", attrs={"href": re.compile(r'\/title\/tt+\d*\/')}) return title
def main(emotion): if (emotion == "sad"): urlhere = 'http://www.imdb.com/search/title?genres=drama&title_type=feature&sort=moviemeter, asc' # movie against emotion Disgust elif (emotion == "disgust"): urlhere = 'http://www.imdb.com/search/title?genres=musical&title_type=feature&sort=moviemeter, asc' # IMDb Url for Family genre of # movie against emotion Anger elif (emotion == "anger"): urlhere = 'http://www.imdb.com/search/title?genres=family&title_type=feature&sort=moviemeter, asc' # IMDb Url for Thriller genre of # movie against emotion Anticipation elif (emotion == "anticipation"): urlhere = 'http://www.imdb.com/search/title?genres=thriller&title_type=feature&sort=moviemeter, asc' # IMDb Url for Sport genre of # movie against emotion Fear elif (emotion == "fear"): urlhere = 'http://www.imdb.com/search/title?genres=sport&title_type=feature&sort=moviemeter, asc' # IMDb Url for Thriller genre of # movie against emotion Enjoyment elif (emotion == "enjoyment"): urlhere = 'http://www.imdb.com/search/title?genres=thriller&title_type=feature&sort=moviemeter, asc' # IMDb Url for Western genre of # movie against emotion Trust elif (emotion == "trust"): urlhere = 'http://www.imdb.com/search/title?genres=western&title_type=feature&sort=moviemeter, asc' # IMDb Url for Film_noir genre of # movie against emotion Surprise elif (emotion == "surprise"): urlhere = 'http://www.imdb.com/search/title?genres=film_noir&title_type=feature&sort=moviemeter, asc' # HTTP request to get the data of # the whole page response = HTTP.get(urlhere) data = response.text # Parsing the data using # BeautifulSoup soup = SOUP(data, "lxml") # Extract movie titles from the # data using regex title = soup.find_all("a", attrs={"href": re.compile(r'\/title\/tt+\d*\/')}) return title
def main(emotion): # movie against emotion Sad if (emotion == "Sad"): urlhere = 'http://www.imdb.com/search/title?genres=drama&title_type=feature&sort=moviemeter, asc' # movie against emotion Disgust elif (emotion == "Disgust"): urlhere = 'http://www.imdb.com/search/title?genres=musical&title_type=feature&sort=moviemeter, asc' # movie against emotion Anger elif (emotion == "Anger"): urlhere = 'http://www.imdb.com/search/title?genres=family&title_type=feature&sort=moviemeter, asc' # movie against emotion Anticipation elif (emotion == "Anticipation"): urlhere = 'http://www.imdb.com/search/title?genres=thriller&title_type=feature&sort=moviemeter, asc' # movie against emotion Fear elif (emotion == "Fear"): urlhere = 'http://www.imdb.com/search/title?genres=sport&title_type=feature&sort=moviemeter, asc' # movie against emotion Enjoyment elif (emotion == "Enjoyment"): urlhere = 'http://www.imdb.com/search/title?genres=thriller&title_type=feature&sort=moviemeter, asc' # movie against emotion Trust elif (emotion == "Trust"): urlhere = 'http://www.imdb.com/search/title?genres=western&title_type=feature&sort=moviemeter, asc' # movie against emotion Surprise elif (emotion == "Surprise"): urlhere = 'http://www.imdb.com/search/title?genres=film_noir&title_type=feature&sort=moviemeter, asc' response = HTTP.get(urlhere) data = response.text soup = SOUP(data, "lxml") title = soup.find_all("a", attrs={"href": re.compile(r'\/title\/tt+\d*\/')}) return title
def get_movie(emotion): ''' Function to web scrape from IMDb website by genre depending on user mood ''' if (emotion == "neutral"): urlhere = 'http://www.imdb.com/search/title?genres=drama&title_type=feature&sort=moviemeter, asc' elif (emotion == "negative"): urlhere = 'http://www.imdb.com/search/title?genres=western&title_type=feature&sort=moviemeter, asc' elif (emotion == "positive"): urlhere = 'http://www.imdb.com/search/title?genres=sport&title_type=feature&sort=moviemeter, asc' response = HTTP.get(urlhere) data = response.text soup = SOUP(data, "lxml") title = soup.find_all("a", attrs={"href": re.compile(r'\/title\/tt+\d*\/')}) return title
def main(emotions): if emotions == "Sad": urlhere = 'http://www.imdb.com/search/title?genres=drama&title_type=feature&sort=moviemeter, asc' elif emotions == "Disgust": urlhere = 'http://www.imdb.com/search/title?genres=musical&title_type=feature&sort=moviemeter, asc' elif emotions == "Anger": urlhere = 'http://www.imdb.com/search/title?genres=family&title_type=feature&sort=moviemeter, asc' elif emotions == "Anticipation": urlhere = 'http://www.imdb.com/search/title?genres=thriller&title_type=feature&sort=moviemeter, asc' response = HTTP.get(urlhere) data = response.text soup = SOUP(data, "lxml") title = soup.find_all("a", attrs={"href": re.compile(r'\/title\/tt+\d*\/')}) return title
def f4(): count = 0 l=[] emotion = "Anticipation" print("ANTICIPATION MOVIES HAIN") urlhere = 'http://www.imdb.com/search/title?genres=thriller&title_type=feature&sort=moviemeter, asc' response = HTTP.get(urlhere) data = response.text soup = SOUP(data, "lxml") title = soup.find_all("a", attrs={"href": re.compile(r'\/title\/tt+\d*\/')}) for i in title: tmp = str(i).split('>') if (len(tmp) == 3): print(tmp[1][:-3]) l.append(tmp[1][:-3]) if (count > 11): break count += 1 print() print() results['text'] = 'Anticipation Movies acc.to IMDB' + ':%s \n * %s \n * %s\n *%s\n *%s\n' % (l[0], l[1], l[2], l[3], l[4]) l = []
def scrapAndProcess(emotion): url = "" if (emotion == "sad"): url = 'http://www.imdb.com/search/title?genres=drama&title_type=feature&sort=moviemeter, asc' elif (emotion == "disgust"): url = 'http://www.imdb.com/search/title?genres=musical&title_type=feature&sort=moviemeter, asc' elif (emotion == "anger"): url = 'http://www.imdb.com/search/title?genres=family&title_type=feature&sort=moviemeter, asc' elif (emotion == "anticipation"): url = 'http://www.imdb.com/search/title?genres=thriller&title_type=feature&sort=moviemeter, asc' elif (emotion == "fear"): url = 'http://www.imdb.com/search/title?genres=sport&title_type=feature&sort=moviemeter, asc' elif (emotion == "enjoyment"): url = 'http://www.imdb.com/search/title?genres=thriller&title_type=feature&sort=moviemeter, asc' elif (emotion == "trust"): url = 'http://www.imdb.com/search/title?genres=western&title_type=feature&sort=moviemeter, asc' elif (emotion == "surprise"): url = 'http://www.imdb.com/search/title?genres=film_noir&title_type=feature&sort=moviemeter, asc' movies = [] try: if not url: return movies response = HTTP.get(url) data = response.text soup = SOUP(data, "lxml") flags = ["None", "X", "\n"] for movieName in soup.findAll( 'a', attrs={"href": re.compile(r'\/title\/tt+\d*\/')}): movieName = str(movieName.string) if movieName not in flags: movies.append(movieName) except Exception as e: print(e) return movies
def main(emotion): if (emotion == "Sad"): urlhere = 'http://www.imdb.com/search/title?genres=drama&title_type=feature&sort=moviemeter, asc' elif (emotion == "Disgust"): urlhere = 'http://www.imdb.com/search/title?genres=musical&title_type=feature&sort=moviemeter, asc' elif (emotion == "Anger"): urlhere = 'http://www.imdb.com/search/title?genres=family&title_type=feature&sort=moviemeter, asc' elif (emotion == "Anticipation"): urlhere = 'http://www.imdb.com/search/title?genres=thriller&title_type=feature&sort=moviemeter, asc' elif (emotion == "Fear"): urlhere = 'http://www.imdb.com/search/title?genres=sport&title_type=feature&sort=moviemeter, asc' elif (emotion == "Enjoyment"): urlhere = 'http://www.imdb.com/search/title?genres=thriller&title_type=feature&sort=moviemeter, asc' elif (emotion == "Trust"): urlhere = 'http://www.imdb.com/search/title?genres=western&title_type=feature&sort=moviemeter, asc' elif (emotion == "Surprise"): urlhere = 'http://www.imdb.com/search/title?genres=film_noir&title_type=feature&sort=moviemeter, asc' response = HTTP.get(urlhere) data = response.text # Parsing the data using # BeautifulSoup soup = SOUP(data, "lxml") # Extract movie titles from the # data using regex title = soup.find_all("a", attrs={"href": re.compile(r'\/title\/tt+\d*\/')}) return title
def target(emotion): url = "" if emotion == "disgust": url = 'http://www.imdb.com/search/title?genres=musical&title_type=feature&sort=moviemeter, asc' elif emotion == "sad": url = 'http://www.imdb.com/search/title?genres=drama&title_type=feature&sort=moviemeter, asc' elif emotion == "trust": url = 'http://www.imdb.com/search/title?genres=western&title_type=feature&sort=moviemeter, asc' elif emotion == "anger": url = 'http://www.imdb.com/search/title?genres=family&title_type=feature&sort=moviemeter, asc' elif emotion == "fear": url = 'http://www.imdb.com/search/title?genres=sport&title_type=feature&sort=moviemeter, asc' elif emotion == "anticipation" or "enjoyment": url = 'http://www.imdb.com/search/title?genres=thriller&title_type=feature&sort=moviemeter, asc' elif emotion == "surprise": url = 'http://www.imdb.com/search/title?genres=film_noir&title_type=feature&sort=moviemeter, asc' response = HTTP.get(url) data = response.text field = SOUP(data, "lxml") #REGEX EXTRACTION OF TITLES title = field.find_all("a", attrs={"href": regex.compile(r'\/title\/tt+\d*\/')}) return title
def recommend(emotion): if emotion=="Sad": urlhere = 'http://www.imdb.com/search/title?genres=drama&title_type=feature&sort=moviemeter, asc' elif(emotion == "Disgust"): urlhere = 'http://www.imdb.com/search/title?genres=musical&title_type=feature&sort=moviemeter, asc' elif(emotion == "Anger"): urlhere = 'http://www.imdb.com/search/title?genres=family&title_type=feature&sort=moviemeter, asc' elif(emotion == "Anticipation"): urlhere = 'http://www.imdb.com/search/title?genres=thriller&title_type=feature&sort=moviemeter, asc' elif(emotion == "Fear"): urlhere = 'http://www.imdb.com/search/title?genres=sport&title_type=feature&sort=moviemeter, asc' elif(emotion == "Enjoyment"): urlhere = 'http://www.imdb.com/search/title?genres=thriller&title_type=feature&sort=moviemeter, asc' elif(emotion == "Trust"): urlhere = 'http://www.imdb.com/search/title?genres=western&title_type=feature&sort=moviemeter, asc' elif(emotion == "Surprise"): urlhere = 'http://www.imdb.com/search/title?genres=film_noir&title_type=feature&sort=moviemeter, asc' ##web scraping to get HTML response=HTTP.get(urlhere) info=response.text soup=SOUP(info,"lxml") ##parsing to form tree name=soup.find_all("a",attrs={"href":re.compile(r'\/title\/tt+\d*\/')}) return name
def main(emotion): if(emotion == "Sad" or emotion == "sad"): urlhere = 'https://www.imdb.com/list/ls009576722/' elif(emotion == "Disgust" or emotion == "disgust"): urlhere = 'https://www.imdb.com/list/ls075745491/' elif(emotion == "Anger" or emotion == "anger"): urlhere = 'https://www.imdb.com/list/ls000445157/' elif(emotion == "Anticipation" or emotion == "anticipation"): urlhere = 'https://www.imdb.com/india/upcoming/' elif(emotion == "Fear" or emotion == "fear"): urlhere = 'https://www.imdb.com/list/ls058201636/' elif(emotion == "Enjoyment" or emotion == "enjoyment"): urlhere = 'https://www.imdb.com/list/ls005597767/' elif(emotion == "Trust" or emotion == "trust"): urlhere = 'https://www.imdb.com/list/ls051594496/' elif(emotion == "Surprise" or emotion == "surprise"): urlhere = 'https://www.imdb.com/list/ls008944391/' response = HTTP.get(urlhere) data = response.text soup = SOUP(data, "html.parser") title = soup.find_all("a", attrs = {"href" : re.compile(r'\/title\/tt+\d*\/')}) return title
def scrape_IMDB(IMDB, num, folder_path=None): folder_path = "movie_summary/" # you only need the folder_path when you need to store movie summary response = requests.get(IMDB) data = SOUP(response.text, 'lxml') # we hope to have movie's name, grading, runtime, and rating IMDB_dict = {} title_lst = [] num_reviews = [] # IMDB lists top 50 from each genre for movie in data.findAll('div', class_="lister-item-content"): # title title = movie.find("a", attrs={"href": re.compile(r'\/title\/tt+\d*\/')}) title = str(title).split('">')[1].split('</')[0] IMDB_dict[title] = [] title_lst.append(title) # movie summary summary = movie.findAll('p', {'class': 'text-muted'}) if summary != None: summary = str(summary).split( ', <p class="text-muted">')[1].replace("\n", "").replace( "</p>]", "") #clean the summary text IMDB_dict[title].append(summary) # grading grading = movie.find('span', class_="certificate") if grading != None: grading = str(grading).split('">')[1].split('</')[0] else: grading = "Not Found" IMDB_dict[title].append(grading) # runtime length = movie.find('span', class_="runtime") if length != None: length = str(length).split('">')[1].split('</')[0] else: length = "Not Found" IMDB_dict[title].append(length) # No. of reviewers for title, movie in zip(title_lst, data.findAll('p', class_="sort-num_votes-visible")): numRater = int(re.sub("[^0-9]", "", movie.text)) num_reviews.append(numRater) # rating for review, title, movie in zip(num_reviews, title_lst, data.findAll('div', class_="ratings-bar")): rating = movie.find('div', class_="inline-block ratings-imdb-rating") try: rating = float( re.search(r'[\d]*[.][\d]+', str(rating).split(' ')[3]).group()) except AttributeError: rating = float( re.search(r'\d+', str(rating).split(' ')[3]).group()) # score adjustments based on number of reviewers through logistic regression weightedRating = rating * log(log(review, 5), 10) weightedRating = round(weightedRating, 1) IMDB_dict[title].append(weightedRating) ranked_dict = rank_movies(IMDB_dict) ranked_dict = dict(list(ranked_dict.items())[0:num]) # print(ranked_dict) return ranked_dict
engine = pyttsx3.init() engine.setProperty( 'voice', 'HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Speech\Voices\Tokens\TTS_MS_EN-US_ZIRA_11.0' ) engine.say(y) engine.setProperty('rate', 1) #120 words per minute engine.setProperty('volume', 1) engine.runAndWait() Say('getting news headlines') urlhere = ('https://www.ndtv.com/top-stories') response = HTTP.get(urlhere) data = response.text soup = SOUP(data, "lxml") file = open('MyFile.txt', 'w') file.write('') file.close() file1 = open("MyFile.txt", "a") i = 1 for title in soup.findAll('h2', attrs={"class": re.compile('nstory_header')}): print(title.string) if i <= 7: title = title.string title = title.split() str1 = '+'
def main(emotion): em=emotion.lower() # IMDb Url for Drama genre of # movie against emotion Sad if(em == "sad"): print("EMOTION DETECTED:",em) urlhere = 'http://www.imdb.com/search/title?genres=drama&title_type=feature&sort=moviemeter, asc' # IMDb Url for Musical genre of # movie against emotion Disgust elif(em == "disgust"): print("EMOTION DETECTED:",em) urlhere = 'http://www.imdb.com/search/title?genres=musical&title_type=feature&sort=moviemeter, asc' # IMDb Url for Family genre of # movie against emotion Anger elif(em == "angry"): print("EMOTION DETECTED:",em) urlhere = 'http://www.imdb.com/search/title?genres=family&title_type=feature&sort=moviemeter, asc' # IMDb Url for Thriller genre of # movie against emotion Anticipation elif(em == "neutral"): print("EMOTION DETECTED:",em) urlhere = 'http://www.imdb.com/search/title?genres=thriller&title_type=feature&sort=moviemeter, asc' # IMDb Url for Sport genre of # movie against emotion Fear elif(em == "scared"): print("EMOTION DETECTED:",em) urlhere = 'http://www.imdb.com/search/title?genres=sport&title_type=feature&sort=moviemeter, asc' # IMDb Url for Thriller genre of # movie against emotion Enjoyment elif(em == "happy"): print("EMOTION DETECTED:",em) urlhere = 'http://www.imdb.com/search/title?genres=thriller&title_type=feature&sort=moviemeter, asc' # IMDb Url for Film_noir genre of # movie against emotion Surprise elif(em == "surprised"): print("EMOTION DETECTED:",em) urlhere = 'http://www.imdb.com/search/title?genres=film_noir&title_type=feature&sort=moviemeter, asc' # HTTP request to get the data of # the whole page response = HTTP.get(urlhere) data = response.text # Parsing the data using # BeautifulSoup soup = SOUP(data, "lxml") # Extract movie titles from the # data using regex title = soup.find_all("a", attrs = {"href" : re.compile(r'\/title\/tt+\d*\/')}) title1 = soup.find_all("h3",{"class":"lister-item-header"}) print("LIST OF APT MOVIES BASED ON USERS CURRENT EMOTION:") print(title1[0].text) rating = soup.find_all("div", {"class": "inline-block ratings-imdb-rating"}) print("rating=",rating[0].text) print(title1[1].text) print("rating=",rating[1].text) print(title1[2].text) print("rating=",rating[2].text) print(title1[3].text) print("rating=",rating[3].text) print(title1[4].text) print("rating=",rating[4].text) print(title1[5].text) print("rating=",rating[5].text) print(title1[6].text) print("rating=",rating[6].text) print(title1[7].text) print("rating=",rating[7].text) print(title1[8].text) print("rating=",rating[8].text) print(title1[9].text) print("rating=",rating[9].text) return title1
def scrape_rt(RT, num): response = requests.get(RT) data = SOUP(response.text, 'lxml') RT_dict = {} title_lst = [] rel_lst = [] reviews_lst = [] # Rotten Tomatoes lists top 100 from each genre # as above, we hope to obtain name, grading, runtime, and rating for movie in data.findAll('tr'): # title title = movie.find("a", class_="unstyled articleLink") if title != None: cleanTitle = str(title).split('">')[1].split(" (")[0].strip( '\n').strip() RT_dict[cleanTitle] = [] title_lst.append(cleanTitle) #100 # link to movie profile rel_link = str(title).split('href="')[1].split('">\n')[0] link = "https://www.rottentomatoes.com/" + rel_link RT_dict[cleanTitle].append(link) # numbers of reviews: num_reviews = movie.find('td', class_="right hidden-xs") if num_reviews != None: num_reviews = int( str(num_reviews).split('">')[1].split('</')[0]) #100 # collect number of reviewers for later movie score adjustments reviews_lst.append(num_reviews) # rating for review, title, movie in zip( reviews_lst, title_lst, data.findAll('span', class_='tMeterIcon tiny')): rating = movie.find('span', class_="tMeterScore") rating = str(rating).split('">\xa0')[1].split('%</')[0] # transform RT rating into the same scale as IMDB rating (out of 10) weightedRating = int(rating) / 10 # score adjustments weightedRating = weightedRating * log(log(review, 4), 5) weightedRating = round(weightedRating, 1) RT_dict[title].append(weightedRating) # to increase the efficiency of the script, # we are going to rank movies based on rating # and only look up movie profiles of top-ranked movies ranked_dict = rank_movies(RT_dict) ranked_dict = dict(list(ranked_dict.items())[0:num]) for value in ranked_dict.values(): rel_lst.append(value[0]) value.pop(0) new_title_lst = list(ranked_dict.keys()) # # grading and runtime information are inside movie profile links for title, link in zip(new_title_lst, rel_lst): response = requests.get(link) data_1 = SOUP(response.text, 'lxml') #movie summary for div_tag in data_1.findAll( 'div', {'class': 'movie_synopsis clamp clamp-6 js-clamp'}): summary = str(div_tag.text).replace("\n", "") ranked_dict[title].insert(0, summary) for div_tag in data_1.findAll('li', {'class': 'meta-row clearfix'}): movie_label = div_tag.find('div', { 'class': 'meta-label subtle' }).text if movie_label == "Rating:": rating_info = div_tag.find('div', {'class': 'meta-value'}).text rating_info = rating_info.replace("\n", "").replace(" ", "") ranked_dict[title].insert(1, rating_info) elif movie_label == "Runtime:": runtime_info = div_tag.find('div', { 'class': 'meta-value' }).text runtime_info = runtime_info.replace("\n", "").replace(" ", "") ranked_dict[title].insert(2, runtime_info) return ranked_dict
def getMovies(emotion): urlhere="No url" data = [] # IMDb Url for Drama genre of # movie against emotion Sad if(emotion == "Sad"): urlhere = 'http://www.imdb.com/search/title?genres=drama&title_type=feature&sort=moviemeter, asc' # IMDb Url for Musical genre of # movie against emotion Disgust elif(emotion == "Disgust"): urlhere = 'http://www.imdb.com/search/title?genres=musical&title_type=feature&sort=moviemeter, asc' # IMDb Url for Family genre of # movie against emotion Anger elif(emotion == "Angry"): urlhere = 'http://www.imdb.com/search/title?genres=family&title_type=feature&sort=moviemeter, asc' # IMDb Url for Thriller genre of # movie against emotion Anticipation elif(emotion == "Anticipation"): urlhere = 'http://www.imdb.com/search/title?genres=thriller&title_type=feature&sort=moviemeter, asc' # IMDb Url for Sport genre of # movie against emotion Fear elif(emotion == "Surprised"): urlhere = 'http://www.imdb.com/search/title?genres=sport&title_type=feature&sort=moviemeter, asc' # IMDb Url for Thriller genre of # movie against emotion Enjoyment elif(emotion == "Happy"): urlhere = 'http://www.imdb.com/search/title?genres=thriller&title_type=feature&sort=moviemeter, asc' # IMDb Url for Western genre of # movie against emotion Trust elif(emotion == "Trust"): urlhere = 'http://www.imdb.com/search/title?genres=western&title_type=feature&sort=moviemeter, asc' # IMDb Url for Film_noir genre of # movie against emotion Surprise elif(emotion == "Surprise"): urlhere = 'http://www.imdb.com/search/title?genres=film_noir&title_type=feature&sort=moviemeter, asc' if(urlhere != "No url"): # HTTP request to get the data of # the whole page response = HTTP.get(urlhere) data = response.text # Parsing the data using # BeautifulSoup soup = SOUP(data, "lxml") # Extract movie titles from the # data using regex samples = soup.find_all("div", "lister-item") ratings = soup.find_all("div","ratings-bar") #print(ratings[0].contents[1].attrs['data-value']) #data = [] name = [] img = [] rating = [] for a in samples: name.append(a.contents[5].contents[1].contents[3].text) img.append(a.contents[3].contents[1].contents[1].attrs['loadlate']) for rate in ratings: rating.append(rate.contents[1].attrs['data-value']) data = zip(name,img,rating) data = list(data) return data
def recommendation(emotion): movies = [] qw = { 'sad': 'comedy.txt', 'disgust': 'romance.txt', 'anger': 'fantasy.txt', 'anticipation': 'mystery.txt', 'fear': 'animation.txt', 'enjoyment': 'western.txt', 'trust': 'music.txt', 'surprise': 'horror.txt' } for k, v in qw.items(): if k == emotion: i = 0 v = 'Recommendation/movies/' + v for line in open(v, 'r').readlines(): movies.append(line) if i > 2: break i += 1 if emotion == "sad": urlhere = 'http://www.imdb.com/search/title?genres=drama&title_type=feature&sort=moviemeter,asc' elif emotion == "disgust": urlhere = 'http://www.imdb.com/search/title?genres=musical&title_type=feature&sort=moviemeter,asc' elif emotion == "anger": urlhere = 'http://www.imdb.com/search/title?genres=family&title_type=feature&sort=moviemeter,asc' elif emotion == "anticipation": urlhere = 'http://www.imdb.com/search/title?genres=thriller&title_type=feature&sort=moviemeter,asc' elif emotion == "fear": urlhere = 'http://www.imdb.com/search/title?genres=sport&title_type=feature&sort=moviemeter,asc' elif emotion == "enjoyment": urlhere = 'http://www.imdb.com/search/title?genres=thriller&title_type=feature&sort=moviemeter,asc' elif emotion == "trust": urlhere = 'http://www.imdb.com/search/title?genres=western&title_type=feature&sort=moviemeter,asc' elif emotion == "surprise": urlhere = 'http://www.imdb.com/search/title?genres=film_noir&title_type=feature&sort=moviemeter,asc' response = http.get(urlhere) data = response.text soup = SOUP(data, "lxml") title = soup.find_all("a", attrs={"href": re.compile(r'\/title\/tt+\d*\/')}) a = title count = 0 if emotion == "disgust" or emotion == "anger" or emotion == "surprise": for i in a: tmp = str(i).split('>') if len(tmp) == 3: movies.append(tmp[1][:-3]) if count > 2: break count += 1 else: for i in a: tmp = str(i).split('>') if len(tmp) == 3: movies.append(tmp[1][:-3]) if count > 2: break count += 1 return movies
def extract_emote(urlhere): response = HTTP.get(urlhere) data = response.text soup = SOUP(data, "lxml") return soup
def Output(Page1, genre, fgc, bgc): Page1.destroy() Page2 = Canvas(root) Page2.configure(bg='#cce6ff') heading = "TOP 50 MOVIES OF " + genre.upper() + " GENRE ARE :" head = Label(Page2, text=heading, fg="black", bg='#1affff', height=2, width=50, font=("Times New Roman", 16, 'bold')) head.grid(row=0, column=0, columnspan=5) ###main heart/core of the application movies_display = "RANK\tMOVIE NAME\t\t\t\tYEAR\t RATING\n" movies_display += ('-' * 5 + '\t' + '-' * 35 + '\t\t\t\t' + '-' * 7 + '\t ' + '-' * 10 + '\n') url = 'https://www.imdb.com/search/title/?genres=' + genre + '&explore=title_type,genres&pf_rd_m=A2FGELUUNOQJNL&pf_rd_p=3396781f-d87f-4fac-8694-c56ce6f490fe&pf_rd_r=HJGEZ0WGPGEPNBKSJH8A&pf_rd_s=center-1&pf_rd_t=15051&pf_rd_i=genre&ref_=ft_gnr_pr1_i_3' response = HTTP.get(url) ##to get to whole page content data = response.text soup = SOUP(data, "lxml") ##parsing the data to xml format movies = soup.find_all('div', {'class': 'lister-item-content'}) ind = 1 for movie in movies: title = movie.find_all('a') title = str(title).split('>') year = movie.find_all('span', {'class': 'lister-item-year text-muted unbold'}) year = str(year).split('>') rating = movie.find_all('strong') rating = str(rating).split('>') title_final = title[1][:-3] if (len(rating) == 1): movies_display += str( ind) + "\t" + title_final[:35] + "\t\t\t\t" + year[ 1][:-6] + "\t\t" + "---\n" else: movies_display += str( ind) + "\t" + title_final[:35] + "\t\t\t\t" + year[ 1][:-6] + "\t\t" + rating[1][:3] + "\n" ind += 1 ## display = ScrolledText(Page2, width=60, height=25, bg=bgc, fg=fgc, font=("Comic Sans MS", 12)) display.insert(END, movies_display) display.grid(row=1, column=0, columnspan=5, rowspan=5, padx=10, pady=10) display.configure(state=DISABLED) back_button = Button(text="Back", fg="white", bg='#003366', height=1, width=8, font=("Comic Sans MS", 14), command=lambda: Input(Page2)) back = Page2.create_window(10, 690, anchor=SW, window=back_button) end_button = Button(text="Quit", fg="white", bg='#660000', height=1, width=8, font=("Comic Sans MS", 14), command=Finish) back = Page2.create_window(630, 690, anchor=SE, window=end_button) Page2.pack()
def main(emotion): em = emotion.lower() import tkinter as tk from tkinter import messagebox root = tk.Tk() root.withdraw() msgbox = tk.messagebox.showinfo('EMOTION', em.upper()) # IMDb Url for Drama genre of # movie against emotion Sad if (em == "sad"): root11 = tk.Tk() root11.withdraw() msgbox1 = tk.messagebox.showinfo('GENRE ALLOCATED', 'DRAMA......') urlhere = 'http://www.imdb.com/search/title?genres=drama&title_type=feature&sort=moviemeter, asc' # IMDb Url for Musical genre of # movie against emotion Disgust elif (em == "disgust"): root11 = tk.Tk() root11.withdraw() msgbox1 = tk.messagebox.showinfo('GENRE ALLOCATED', 'MUSICAL........') urlhere = 'http://www.imdb.com/search/title?genres=musical&title_type=feature&sort=moviemeter, asc' # IMDb Url for Family genre of # movie against emotion Anger elif (em == "angry"): root11 = tk.Tk() root11.withdraw() msgbox1 = tk.messagebox.showinfo('GENRE ALLOCATED', 'FAMILY.......') urlhere = 'http://www.imdb.com/search/title?genres=family&title_type=feature&sort=moviemeter, asc' # IMDb Url for Thriller genre of # movie against emotion Anticipation elif (em == "neutral"): root11 = tk.Tk() root11.withdraw() msgbox1 = tk.messagebox.showinfo('GENRE ALLOCATED', 'THRILLER........') urlhere = 'http://www.imdb.com/search/title?genres=thriller&title_type=feature&sort=moviemeter, asc' # IMDb Url for Sport genre of # movie against emotion Fear elif (em == "scared"): root11 = tk.Tk() root11.withdraw() msgbox1 = tk.messagebox.showinfo('GENRE ALLOCATED', 'SPORT.........') urlhere = 'http://www.imdb.com/search/title?genres=sport&title_type=feature&sort=moviemeter, asc' # IMDb Url for Thriller genre of # movie against emotion Enjoyment elif (em == "happy"): root11 = tk.Tk() root11.withdraw() msgbox1 = tk.messagebox.showinfo('GENRE ALLOCATED', 'THRILLER.........') urlhere = 'http://www.imdb.com/search/title?genres=thriller&title_type=feature&sort=moviemeter, asc' # IMDb Url for Film_noir genre of # movie against emotion Surprise elif (em == "surprised"): root11 = tk.Tk() root11.withdraw() msgbox1 = tk.messagebox.showinfo('GENRE ALLOCATED', 'FILM_NOIR.........') urlhere = 'http://www.imdb.com/search/title?genres=film_noir&title_type=feature&sort=moviemeter, asc' # HTTP request to get the data of # the whole page response = HTTP.get(urlhere) data = response.text # Parsing the data using # BeautifulSoup soup = SOUP(data, "lxml") # Extract movie titles from the # data using regex title = soup.find_all( "a", attrs={"href": re.compile(r'\/title\/tt+\d*\/')}) title1 = soup.find_all("h3", {"class": "lister-item-header"}) rating = soup.find_all( "div", {"class": "inline-block ratings-imdb-rating"}) import tkinter from tkinter import ttk root12 = tkinter.Tk() root12.geometry("600x600") root12.title("MOVIES RECOMMENDED FOR DETECTED EMOTION") tree = ttk.Treeview(root12) tree["columns"] = ("one", "two") tree.column("one", width=200) tree.column("two", width=200) style = ttk.Style(root12) style.configure('Treeview', rowheight=45) tree.heading("one", text="MOVIES") tree.heading("two", text="RATINGS") for i in range(9, -1, -1): tree.insert("", 0, text="", values=(title1[i].text, rating[i].text)) tree.pack() return title1
def scrapAndProcess(emotion): # URL to which GET request will be made url = "" # IMDb URL for Drama genre of movie against emotion Sad if (emotion == "sad"): url = 'http://www.imdb.com/search/title?genres=drama&title_type=feature&sort=moviemeter, asc' # IMDb URL for Musical genre of movie against emotion Disgust elif (emotion == "disgust"): url = 'http://www.imdb.com/search/title?genres=musical&title_type=feature&sort=moviemeter, asc' # IMDb URL for Family genre of movie against emotion Anger elif (emotion == "anger"): url = 'http://www.imdb.com/search/title?genres=family&title_type=feature&sort=moviemeter, asc' # IMDb URL for Thriller genre of movie against emotion Anticipation elif (emotion == "anticipation"): url = 'http://www.imdb.com/search/title?genres=thriller&title_type=feature&sort=moviemeter, asc' # IMDb URL for Sport genre of movie against emotion Fear elif (emotion == "fear"): url = 'http://www.imdb.com/search/title?genres=sport&title_type=feature&sort=moviemeter, asc' # IMDb URL for Thriller genre of movie against emotion Enjoyment elif (emotion == "enjoyment"): url = 'http://www.imdb.com/search/title?genres=thriller&title_type=feature&sort=moviemeter, asc' # IMDb URL for Western genre of movie against emotion Trust elif (emotion == "trust"): url = 'http://www.imdb.com/search/title?genres=western&title_type=feature&sort=moviemeter, asc' # IMDb URL for Film_noir genre of movie against emotion Surprise elif (emotion == "surprise"): url = 'http://www.imdb.com/search/title?genres=film_noir&title_type=feature&sort=moviemeter, asc' # List to store all movie names movies = [] # Try catch block to prevent abrupt termination of code if IMDb server is down try: # If entered emotion is not from one of the above, return empty movies list if not url: return movies # HTTP request to get the data of the whole page response = HTTP.get(url) # Accessing the text property of the response object data = response.text # Parsing the data using BeautifulSoup soup = SOUP(data, "lxml") # Pruning noisy data - the elements in this list can appear as movie names flags = ["None", "X", "\n"] # Extract movie titles from the data using regex for movieName in soup.findAll( 'a', attrs={"href": re.compile(r'\/title\/tt+\d*\/')}): # Converting from bs4.element.NavigableString to python3 string movieName = str(movieName.string) # Checking if movie name is not in noisy data list if movieName not in flags: movies.append(movieName) # Catch exceptions - they might occur if the IMDb server is down except Exception as e: print(e) return movies
def main(emotion): # IMDb Url for Comedy Drama genre of # movie against emotion Sad if (emotion == "Sad"): urlhere = 'https://www.imdb.com/search/title/?title_type=feature&genres=comedy&sort=boxoffice_gross_us,desc&explore=genres' # IMDb Url for feel good genre of # movie against emotion Happy. elif (emotion == "Happy"): urlhere = 'https://www.imdb.com/list/ls068773014/?sort=user_rating,desc&st_dt=&mode=detail&page=1' # IMDb Url for Action and SciFi genre of # movie against emotion Excitement. elif (emotion == "Excitement"): urlhere = 'https://www.imdb.com/search/title/?count=100&genres=action&release_date=2019,2019&title_type=feature' # IMDb Url for Musical genre of # movie against emotion Disgust elif (emotion == "Disgust"): urlhere = 'http://www.imdb.com/search/title?genres=musical&title_type=feature&sort=moviemeter, asc' # IMDb Url for Family genre of # movie against emotion Anger elif (emotion == "Anger"): urlhere = 'https://www.imdb.com/list/ls076036380/?sort=user_rating,desc&st_dt=&mode=detail&page=1' # IMDb Url for Sport genre of # movie against emotion Fear elif (emotion == "Fear"): urlhere = 'http://www.imdb.com/search/title?genres=sport&title_type=feature&sort=moviemeter, asc' # IMDb Url for Thriller genre of # movie against emotion Enjoyment elif (emotion == "Enjoyment"): urlhere = 'http://www.imdb.com/search/title?genres=thriller&title_type=feature&sort=moviemeter, asc' # IMDb Url for Top Rated Movies. # movie against no emotion entered. elif (emotion == ""): urlhere = 'https://www.imdb.com/chart/top?ref_=nv_mv_250' # IMDb Url for Western genre of # movie against emotion Trust elif (emotion == "Trust"): urlhere = 'http://www.imdb.com/search/title?genres=western&title_type=feature&sort=moviemeter, asc' # IMDb Url for Film_noir genre of # movie against emotion Surprise elif (emotion == "Surprise"): urlhere = 'http://www.imdb.com/search/title?genres=film_noir&title_type=feature&sort=moviemeter, asc' # HTTP request to get the data of # the whole page response = HTTP.get(urlhere) data = response.text # Parsing the data using # BeautifulSoup soup = SOUP(data, "lxml") # Extract movie titles from the # data using regex title = soup.find_all( "a", attrs={"href": re.compile(r'\/title\/tt+\d*\/')}) return title
c = 0 for r in range(24): print(genres[r],end="\t\t") c+=1 if c==5: c = 0 print() print("\n"+"-"*30) ###-------------- genre = input("Enter a genre to search : ") url = 'https://www.imdb.com/search/title/?genres='+genre+'&explore=title_type,genres&pf_rd_m=A2FGELUUNOQJNL&pf_rd_p=3396781f-d87f-4fac-8694-c56ce6f490fe&pf_rd_r=HJGEZ0WGPGEPNBKSJH8A&pf_rd_s=center-1&pf_rd_t=15051&pf_rd_i=genre&ref_=ft_gnr_pr1_i_3' response = HTTP.get(url) ##to get to whole page content data = response.text soup = SOUP(data,"lxml") ##parsing the data to xml format movies = soup.find_all('div',{'class':'lister-item-content'}) ##---print header---- print("-"*30) print("S.No\tName ||| Year ||| Rating") print("-"*30) ##------------------- ind = 1 for movie in movies: title = movie.find_all('a') title = str(title).split('>') year = movie.find_all('span',{'class':'lister-item-year text-muted unbold'}) year = str(year).split('>')
def main(emotion): # IMDb Url for Drama genre of # movie against emotion Sad if(emotion == "Sad"): urlhere = 'http://www.imdb.com/search/title?genres=drama&title_type=feature&sort=moviemeter, asc' # IMDb Url for Musical genre of # movie against emotion Disgust elif(emotion == "Disgust"): urlhere = 'http://www.imdb.com/search/title?genres=musical&title_type=feature&sort=moviemeter, asc' # IMDb Url for Family genre of # movie against emotion Anger elif(emotion == "Anger"): urlhere = 'http://www.imdb.com/search/title?genres=family&title_type=feature&sort=moviemeter, asc' # IMDb Url for Thriller genre of # movie against emotion Anticipation elif(emotion == "Anticipation"): urlhere = 'http://www.imdb.com/search/title?genres=thriller&title_type=feature&sort=moviemeter, asc' # IMDb Url for Sport genre of # movie against emotion Fear elif(emotion == "Fear"): urlhere = 'http://www.imdb.com/search/title?genres=sport&title_type=feature&sort=moviemeter, asc' # IMDb Url for Thriller genre of # movie against emotion Enjoyment elif(emotion == "Enjoyment"): urlhere = 'http://www.imdb.com/search/title?genres=thriller&title_type=feature&sort=moviemeter, asc' # IMDb Url for Western genre of # movie against emotion Trust elif(emotion == "Trust"): urlhere = 'http://www.imdb.com/search/title?genres=western&title_type=feature&sort=moviemeter, asc' # IMDb Url for Film_noir genre of # movie against emotion Surprise elif(emotion == "Surprise"): urlhere = 'http://www.imdb.com/search/title?genres=film_noir&title_type=feature&sort=moviemeter, asc' else: print("Wrong emotion") return # HTTP request to get the data of # the whole page response = HTTP.get(urlhere) data = response.text # Parsing the data using # BeautifulSoup soup = SOUP(data, "lxml") # Extract movie titles from the # data using regex title = soup.find_all("a", attrs = {"href" : re.compile(r'\/title\/tt+\d*\/')}) print('Best Movies For Gener '+emotion+' are:') var=1 for t in soup.findAll("h3",attrs={'class':'lister-item-header'}): print (str(var)+" "+t.find('a').contents[0]) var=var+1 return title