Пример #1
0
def mars_news(browser):

    # Scrape Mars News
    # Visist the mars nasa news site
    url = 'https://mars.nasa.gov/news/'
    browser.visit(url)
    
    #Optional delay for loading the page
    browser.is_element_present_by_css("ul.item_list li.slide", wait_time=1)

    # Convert the browser html
    html= browser.html
    news_soup= SOUP(html, 'html.parser')
    
    #Add try/except for error handling
    try: 
        slide_elem = news_soup.select_one("ul.item_list li.slide")
        #Use the parent element to find the first 'a' tag and save it
        news_title = slide_elem.find("div", class_='content_title').get_text()
        # Use the parent element to find the paragraph text
        news_p = slide_elem.find("div", class_="article_teaser_body").get_text()
    
    except AttributeError:
        return None, None

    return news_title, news_p
Пример #2
0
    def main(emotion):
        if emotion == "Sad":
            urlhere = "http://www.imdb.com/search/title?genres=drama&title_type=feature&sort=moviemeter, asc"

        elif emotion == "Anticipation":
            urlhere = "https://www.imdb.com/search/title/?genres=sci-fi"

        elif emotion == "Fear":
            urlhere = "http://www.imdb.com/search/title?genres=sport&title_type=feature&sort=moviemeter, asc"

        elif emotion == "Enjoyment":
            urlhere = "http://www.imdb.com/search/title?genres=thriller&title_type=feature&sort=moviemeter, asc"

        elif emotion == "Trust":
            urlhere = "http://www.imdb.com/search/title?genres=western&title_type=feature&sort=moviemeter, asc"

        elif emotion == "Romantic":
            urlhere = "https://www.imdb.com/search/title/?genres=romance"

        elif emotion == "Comedy":
            urlhere = "https://www.imdb.com/search/title/?genres=comedy"

        response = HTTP.get(urlhere)

        data = response.text
        soup = SOUP(data, "lxml")
        title = soup.find_all("a",
                              attrs={"href": re.compile(r"\/title\/tt+\d*\/")})
        return title
Пример #3
0
def f4():
    count = 0
    l = []
    emotion = "Anticipation"
    print("ANTICIPATION MOVIES HAIN")
    urlhere = 'http://www.imdb.com/search/title?genres=thriller&title_type=feature&sort=moviemeter, asc'
    response = HTTP.get(urlhere)
    data = response.text
    soup = SOUP(data, "lxml")
    title = soup.find_all("a",
                          attrs={"href": re.compile(r'\/title\/tt+\d*\/')})
    for i in title:
        tmp = str(i).split('>')
        if (len(tmp) == 3):
            print(tmp[1][:-3])
            l.append(tmp[1][:-3])
        if (count > 11):
            break
        count += 1
    print()
    print()
    results[
        'text'] = 'Anticipation Movies acc.to IMDB' + ':%s \n * %s \n * %s\n *%s\n *%s\n' % (
            l[0], l[1], l[2], l[3], l[4])
    l = []
Пример #4
0
def featured_image(browser):    
    # Set up URL call
    url = 'http://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
    browser.visit(url)

    # Find and click the full image button
    full_image_elem = browser.find_by_id('full_image')
    full_image_elem.click()

    # Find the more info button and click on it
    browser.is_element_present_by_text('more info', wait_time = 1)
    more_info_elem = browser.links.find_by_partial_text('more info')
    more_info_elem.click()

    # Parse the resulting html with soup
    html= browser.html
    img_soup = SOUP(html, 'html.parser')

    #Add try/except for error handling
    try: 

        # Find the relative image url
        img_url_rel = img_soup.select_one('figure.lede a img').get("src")
    
    except AttributeError:
        return None

    # Use the base URL to create an absolute URL
    img_url = f'https://www.jpl.nasa.gov{img_url_rel}'
    
    return img_url
Пример #5
0
def main(emotion):
    # IMDb Url for Comedy Drama genre of
    # movie against emotion Sad
    if (emotion == "Sad"):
        urlhere = 'https://www.imdb.com/list/ls052109630/'

    # IMDb Url for Action and SciFi genre of
    # movie against emotion Excitement.
    elif (emotion == "Excitement"):
        urlhere = 'https://www.imdb.com/search/title/?count=100&genres=action&release_date=2019,2019&title_type=feature'

    # IMDb Url for Musical genre of
    # movie against emotion Disgust
    elif (emotion == "Disgust"):
        urlhere = 'http://www.imdb.com/search/title?genres=musical&title_type=feature&sort=moviemeter, asc'

    # IMDb Url for Family genre of
    # movie against emotion Anger
    elif (emotion == "Anger"):
        urlhere = 'https://www.imdb.com/list/ls004108030/'

    # IMDb Url for Sport genre of
    # movie against emotion Fear
    elif (emotion == "Fear"):
        urlhere = 'http://www.imdb.com/search/title?genres=sport&title_type=feature&sort=moviemeter, asc'

    # IMDb Url for Thriller genre of
    # movie against emotion Enjoyment
    elif (emotion == "Enjoyment"):
        urlhere = 'http://www.imdb.com/search/title?genres=thriller&title_type=feature&sort=moviemeter, asc'

    # IMDb Url for Top Rated Movies.
    # movie against no emotion entered.
    elif (emotion == ""):
        urlhere = 'https://www.imdb.com/chart/top?ref_=nv_mv_250'

    # IMDb Url for Western genre of
    # movie against emotion Trust
    elif (emotion == "Trust"):
        urlhere = 'http://www.imdb.com/search/title?genres=western&title_type=feature&sort=moviemeter, asc'

    # IMDb Url for Film_noir genre of
    # movie against emotion Surprise
    elif (emotion == "Surprise"):
        urlhere = 'http://www.imdb.com/search/title?genres=film_noir&title_type=feature&sort=moviemeter, asc'

    # HTTP request to get the data of
    # the whole page
    response = HTTP.get(urlhere)
    data = response.text

    # Parsing the data using
    # BeautifulSoup
    soup = SOUP(data, "lxml")

    # Extract movie titles from the
    # data using regex
    title = soup.find_all("a",
                          attrs={"href": re.compile(r'\/title\/tt+\d*\/')})
    return title
def mars_hemispheres(browser):
    #https://stackoverflow.com/questions/46933679/scraping-text-in-h3-and-div-tags-using-beautifulsoup-python
    #https://www.dataquest.io/blog/web-scraping-beautifulsoup/
    #scrape images from website
    url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
    browser.visit(url)      
    
   
    hemisphere_images = []
        
    #loop through the images 4-8 not 1-4 -- found that those are not the correct images we are looking for 
    for item in range(4,8):
       
       image_thumbnail = browser.find_by_tag('img')
       image_thumbnail[item].click()

        #HTML object and parse through the website with SOUP
        html= browser.html
        hemis_soup = SOUP(html, 'html.parser')

        # Get all the items from the mars hemisphere
        hemis_items = hemis_soup.find('a', text = 'Sampple').get('href')
        hemis_title = hemis_soup.find('h2', 'title').text 

        #store link that takes you to full website image
        hemisphere_images.apped({"title": hemis_title, "url": hemis_items})
        browser.visit(url + )
        #must return to page to iterate again
        browser.back()
Пример #7
0
def getYoutubeLink(seacrhText):
    query = urllib.quote(seacrhText)
    url = "https://www.youtube.com/results?search_query=" + query
    # response = urllib2.urlopen(url)
    response = HTTP.get(url)
    # html = response.read()
    html = response.text
    soup = SOUP(html, "lxml")
    videos = soup.findAll(attrs={'class': 'yt-uix-tile-link'})

    # Print all video links from query
    # for vid in videos:
    #     print('https://www.youtube.com' + vid['href'])

    embeddedLink = None

    firstVideo = videos[0]
    if "watch" in firstVideo['href']:
        # print('https://www.youtube.com' + firstVideo['href'])
        # print(' ')

        # str = "<iframe width=\"560\" height=\"315\" src=\"https://www.youtube.com/embed/jOGCNX8rBWs?rel=0&amp;showinfo=0\" frameborder=\"0\" allow=\"autoplay; encrypted-media\" allowfullscreen></iframe>"

        # prefix = "<iframe width=\"560\" height=\"315\" src=\"https://www.youtube.com/embed"
        key = "https://www.youtube.com/embed/" + firstVideo['href'][
            9:] + "?rel=0&amp;showinfo=0\\"
        # suffix = "?rel=0&amp;showinfo=0\" frameborder=\"0\" allow=\"autoplay; encrypted-media\" allowfullscreen></iframe>"

        embeddedLink = key

    return embeddedLink
Пример #8
0
def main(emotion): 

	if(emotion == "Sad"): 
		urlhere = 'http://www.imdb.com/search/title?genres=drama&title_type=feature&sort=moviemeter, asc'

	elif(emotion == "Disgust"): 
		urlhere = 'http://www.imdb.com/search/title?genres=musical&title_type=feature&sort=moviemeter, asc'

	elif(emotion == "Anger"): 
		urlhere = 'http://www.imdb.com/search/title?genres=family&title_type=feature&sort=moviemeter, asc'

	elif(emotion == "Anticipation"): 
		urlhere = 'http://www.imdb.com/search/title?genres=thriller&title_type=feature&sort=moviemeter, asc'

	elif(emotion == "Fear"): 
		urlhere = 'http://www.imdb.com/search/title?genres=sport&title_type=feature&sort=moviemeter, asc'

	elif(emotion == "Enjoyment"): 
		urlhere = 'http://www.imdb.com/search/title?genres=thriller&title_type=feature&sort=moviemeter, asc'

	elif(emotion == "Trust"): 
		urlhere = 'http://www.imdb.com/search/title?genres=western&title_type=feature&sort=moviemeter, asc'

	elif(emotion == "Surprise"): 
		urlhere = 'http://www.imdb.com/search/title?genres=film_noir&title_type=feature&sort=moviemeter, asc'

	response = HTTP.get(urlhere) 
	data = response.text 

	soup = SOUP(data, "lxml") 

	title = soup.find_all("a", attrs = {"href" : re.compile(r'\/title\/tt+\d*\/')}) 
	return title 
Пример #9
0
def main(emotion):

    # IMDb Url for Drama genre of
    # movie against emotion Sad
    if (emotion == "Sad"):
        urlhere = 'http://www.imdb.com/search/title?genres=drama&title_type=feature&sort=moviemeter, asc'

    # IMDb Url for Musical genre of
    # movie against emotion Disgust
    elif (emotion == "Disgust"):
        urlhere = 'http://www.imdb.com/search/title?genres=musical&title_type=feature&sort=moviemeter, asc'

    # IMDb Url for Family genre of
    # movie against emotion Anger
    elif (emotion == "Anger"):
        urlhere = 'http://www.imdb.com/search/title?genres=family&title_type=feature&sort=moviemeter, asc'

    # IMDb Url for Thriller genre of
    # movie against emotion Anticipation
    elif (emotion == "Anticipation"):
        urlhere = 'http://www.imdb.com/search/title?genres=thriller&title_type=feature&sort=moviemeter, asc'

    # IMDb Url for Sport genre of
    # movie against emotion Fear
    elif (emotion == "Fear"):
        urlhere = 'http://www.imdb.com/search/title?genres=sport&title_type=feature&sort=moviemeter, asc'

    # IMDb Url for Thriller genre of
    # movie against emotion Enjoyment
    elif (emotion == "Enjoyment"):
        urlhere = 'http://www.imdb.com/search/title?genres=thriller&title_type=feature&sort=moviemeter, asc'

    # IMDb Url for Western genre of
    # movie against emotion Trust
    elif (emotion == "Trust"):
        urlhere = 'http://www.imdb.com/search/title?genres=western&title_type=feature&sort=moviemeter, asc'

    # IMDb Url for Film_noir genre of
    # movie against emotion Surprise
    elif (emotion == "Surprise"):
        urlhere = 'http://www.imdb.com/search/title?genres=film_noir&title_type=feature&sort=moviemeter, asc'

    # HTTP request to get the data of
    # the whole page
    urlhere = "https://www.imdb.com/list/ls009668314/"
    response = HTTP.get(urlhere)
    data = response.text

    # Parsing the data using
    # BeautifulSoup
    soup = SOUP(data, "lxml")

    # Extract movie titles from the
    # data using regex
    print(soup.find_all("a"))
    title = soup.find_all("a",
                          attrs={"href": re.compile(r'\/title\/tt+\d*\/')})
    return title
def get_movie(emotion):
    details = {}

    # IMDb Url for Drama genre of
    # movie against emotion Sad
    if (emotion == "sad"):
        urlhere = 'http://www.imdb.com/search/title?genres=drama&title_type=feature&sort=moviemeter, asc'

    # IMDb Url for Family genre of
    # movie against emotion Anger
    elif (emotion == "anger"):
        urlhere = 'http://www.imdb.com/search/title?genres=family&title_type=feature&sort=moviemeter, asc'

    # IMDb Url for Thriller genre of
    # movie against emotion Anticipation
    elif (emotion == "anticipation"):
        urlhere = 'http://www.imdb.com/search/title?genres=thriller&title_type=feature&sort=moviemeter, asc'

    # IMDb Url for Sport genre of
    # movie against emotion Fear
    elif (emotion == "fear"):
        urlhere = 'http://www.imdb.com/search/title?genres=sport&title_type=feature&sort=moviemeter, asc'

    # IMDb Url for Thriller genre of
    # movie against emotion Enjoyment
    elif (emotion == "enjoyment"):
        urlhere = 'http://www.imdb.com/search/title?genres=thriller&title_type=feature&sort=moviemeter, asc'
    elif (emotion == "enthusiastic"):
        urlhere = 'https://www.imdb.com/search/title/?genres=comedy&title_type=feature&sort=moviemeter, asc'

    # IMDb Url for Film_noir genre of
    # movie against emotion Surprise
    elif (emotion == "surprise"):
        urlhere = 'http://www.imdb.com/search/title?genres=film_noir&title_type=feature&sort=moviemeter, asc'
    elif (emotion == "happy"):
        urlhere = 'https://www.imdb.com/list/ls060352216/,asc'
    # HTTP request to get the data of
    # the whole page
    response = requests.get(urlhere)
    data = response.text

    # Parsing the data using
    # BeautifulSoup
    soup = SOUP(data, "html.parser")
    # print(soup.prettify())

    # Extract movie titles from the
    # data using regex
    data = soup.find_all("div", attrs={"class": "lister-item-content"})
    i = 0
    for d in data:
        results = {}
        results['Name'] = d.a.contents[0]
        results['Genre'] = d.find("span", {"class": "genre"}).contents[0]
        details[i] = results
        i = i + 1

    return details
def main(emotion):

    # IMDb Url for Drama genre of
    # movie against emotion Sad
    if (emotion == "Sad" or emotion == "sad"):
        urlhere = 'http://www.imdb.com/search/title?genres=drama&title_type=feature&sort=moviemeter, asc'

    # IMDb Url for Musical genre of
    # movie against emotion Disgust
    elif (emotion == "Disgust" or emotion == "disgust"):
        urlhere = 'http://www.imdb.com/search/title?genres=musical&title_type=feature&sort=moviemeter, asc'

    # IMDb Url for Family genre of
    # movie against emotion Anger
    elif (emotion == "Angry" or emotion == "angry"):
        urlhere = 'http://www.imdb.com/search/title?genres=family&title_type=feature&sort=moviemeter, asc'

    # IMDb Url for Thriller genre of
    # movie against emotion Anticipation
    elif (emotion == "neutral" or emotion == "neutral"):
        urlhere = 'http://www.imdb.com/search/title?genres=thriller&title_type=feature&sort=moviemeter, asc'

# IMDb Url for Sport genre of
# movie against emotion Fear
    elif (emotion == "Fear" or emotion == "fear"):
        urlhere = 'http://www.imdb.com/search/title?genres=sport&title_type=feature&sort=moviemeter, asc'

    # IMDb Url for Thriller genre of
    # movie against emotion Enjoyment
    elif (emotion == "Happy" or emotion == "happy"):
        urlhere = 'http://www.imdb.com/search/title?genres=thriller&title_type=feature&sort=moviemeter, asc'

    # IMDb Url for Western genre of
    # # movie against emotion Trust
    # elif(emotion == "Trust" or emotion == "trust"):
    #     urlhere = 'http://www.imdb.com/search/title?genres=western&title_type=feature&sort=moviemeter, asc'

    # IMDb Url for Film_noir genre of
    # movie against emotion Surprise
    elif (emotion == "Surprise" or emotion == "surprise"):
        urlhere = 'http://www.imdb.com/search/title?genres=film_noir&title_type=feature&sort=moviemeter, asc'

    # HTTP request to get the data of
    # the whole page
    response = HTTP.get(urlhere)
    data = response.text

    # Parsing the data using
    # BeautifulSoup
    soup = SOUP(data, "lxml")

    # Extract movie titles from the
    # data using regex
    b = soup.find_all('h3', {'class': 'lister-item-header'})
    #c = b.findAll()

    return b
Пример #12
0
def main(emotion):
    # IMDb Url for Drama genre of
    # movie against emotion Sad
    urlhere = None
    print(emotion)
    if (emotion == "sadness"):
        urlhere = 'http://www.imdb.com/search/title?genres=drama&title_type=feature&sort=moviemeter, asc'

    # IMDb Url for Musical genre of
    # movie against emotion Disgust
    elif (emotion == "disgust"):
        urlhere = 'http://www.imdb.com/search/title?genres=musical&title_type=feature&sort=moviemeter, asc'

    # IMDb Url for Family genre of
    # movie against emotion Anger
    elif (emotion == "anger"):
        urlhere = 'http://www.imdb.com/search/title?genres=family&title_type=feature&sort=moviemeter, asc'

    # IMDb Url for Thriller genre of
    # movie against emotion Anticipation
    elif (emotion == "neutral"):
        urlhere = 'http://www.imdb.com/search/title?genres=thriller&title_type=feature&sort=moviemeter, asc'

    # # IMDb Url for Sport genre of
    # # movie against emotion Fear
    # elif (emotion == "Fear"):
    #     urlhere = 'http://www.imdb.com/search/title?genres=sport&title_type=feature&sort=moviemeter, asc'

    # IMDb Url for Thriller genre of
    # movie against emotion Enjoyment
    elif (emotion == "happy"):
        urlhere = 'http://www.imdb.com/search/title?genres=thriller&title_type=feature&sort=moviemeter, asc'

    # IMDb Url for Film_noir genre of
    # movie against emotion Surprise
    elif (emotion == "surprise"):
        urlhere = 'http://www.imdb.com/search/title?genres=film_noir&title_type=feature&sort=moviemeter, asc'

    # HTTP request to get the data of
    # the whole page
    response = HTTP.get(urlhere)
    data = response.text

    # Parsing the data using
    # BeautifulSoup
    soup = SOUP(data, "lxml")

    # Extract movie titles from the
    # data using regex
    title = soup.find_all("a",
                          attrs={"href": re.compile(r'\/title\/tt+\d*\/')})
    return title
Пример #13
0
    def scrape(buttonTitle):
        #Gets the value from the emotion key in the application dictionary
        urlhere = app_dict[buttonTitle]

        #Extracts raw HTTP data from site
        response = HTTP.get(urlhere)
        data = response.text

        #Filters out all most unnecessary information but still far from
        #extraction of individual movie titles
        soup = SOUP(data, "lxml")
        raw_data = soup.find_all(
            "a", attrs={"href": re.compile(r'\/title\/tt+\d*\/')})
        return raw_data
Пример #14
0
def main(emotion):
    """
    The Main function, for parsing the emotion according to IMDB recommendation.
    |___________________________|
    |   Emotion     |   Genre   |
    |Sad            |Drama      |
    |Anger          |Family     |
    |Anticipation   |Thriller   |
    |Disgust        |Musical    |
    |Fear           |Sport      |
    |Joy            |Thriller   |
    |Sad            |Drama      |
    |Surprise       |Noir       |
    |Trust          |Western    |
    |---------------------------|
    :param emotion: Name of Emotion
    :return: Titles of Movies
    """

    if emotion == "Sad":
        linked_url = "http://www.imdb.com/search/title?genres=drama&title_type=feature&sort=moviemeter, asc"

    elif emotion == "Disgust":
        linked_url = "http://www.imdb.com/search/title?genres=musical&title_type=feature&sort=moviemeter, asc"

    elif emotion == "Anger":
        linked_url = "http://www.imdb.com/search/title?genres=family&title_type=feature&sort=moviemeter, asc"

    elif emotion == "Anticipation":
        linked_url = "http://www.imdb.com/search/title?genres=thriller&title_type=feature&sort=moviemeter, asc"

    elif emotion == "Fear":
        linked_url = "http://www.imdb.com/search/title?genres=sport&title_type=feature&sort=moviemeter, asc"

    elif emotion == "Joy":
        linked_url = "http://www.imdb.com/search/title?genres=thriller&title_type=feature&sort=moviemeter, asc"

    elif emotion == "Trust":
        linked_url = "http://www.imdb.com/search/title?genres=western&title_type=feature&sort=moviemeter, asc"

    elif emotion == "Surprise":
        linked_url = "http://www.imdb.com/search/title?genres=film_noir&title_type=feature&sort=moviemeter, asc"

    response = HTTP.get(linked_url)
    data = response.text
    soup = SOUP(data, "lxml")

    title = soup.find_all("a",
                          attrs={"href": re.compile(r"\/title\/tt+\d*\/")})
    return title
Пример #15
0
def main(emotion):

    # IMDb Url for Drama genre of
    # movie against emotion Sad
    if (emotion == "sad"):
        urlhere = 'http://www.imdb.com/search/title?genres=drama&title_type=feature&sort=moviemeter, asc'

    # IMDb Url for Family genre of
    # movie against emotion Anger
    elif (emotion == "anger"):
        urlhere = 'http://www.imdb.com/search/title?genres=family&title_type=feature&sort=moviemeter, asc'

    # IMDb Url for Thriller genre of
    # movie against emotion Anticipation
    elif (emotion == "anticipation"):
        urlhere = 'http://www.imdb.com/search/title?genres=thriller&title_type=feature&sort=moviemeter, asc'

    # IMDb Url for Sport genre of
    # movie against emotion Fear
    elif (emotion == "fear"):
        urlhere = 'http://www.imdb.com/search/title?genres=sport&title_type=feature&sort=moviemeter, asc'

    # IMDb Url for Thriller genre of
    # movie against emotion Enjoyment
    elif (emotion == "enjoyment"):
        urlhere = 'http://www.imdb.com/search/title?genres=thriller&title_type=feature&sort=moviemeter, asc'
    elif (emotion == "enthusiastic"):
        urlhere = 'https://www.imdb.com/search/title/?genres=comedy&title_type=feature&sort=moviemeter, asc'

    # IMDb Url for Film_noir genre of
    # movie against emotion Surprise
    elif (emotion == "surprise"):
        urlhere = 'http://www.imdb.com/search/title?genres=film_noir&title_type=feature&sort=moviemeter, asc'
    elif (emotion == "happy"):
        urlhere = 'https://www.imdb.com/list/ls060352216/,asc'
    # HTTP request to get the data of
    # the whole page
    response = HTTP.get(urlhere)
    data = response.text

    # Parsing the data using
    # BeautifulSoup
    soup = SOUP(data, "lxml")

    # Extract movie titles from the
    # data using regex
    title = soup.find_all("a",
                          attrs={"href": re.compile(r'\/title\/tt+\d*\/')})
    return title
Пример #16
0
def main(emotion):
	if(emotion == 'sad'):
		urlhere =  'http://www.imdb.com/search/title?genres=drama&title_type=feature&sort=moviemeter'

	elif(emotion == 'surprise'):
		urlhere = 'http://www.imdb.com/search/title?genres=film_noir&title_type=feature&sort=moviemeter'


	response = HTTP.get(urlhere)
	data = response.text

	soup = SOUP(data, "lxml")

	title = soup.find_all("a", attrs = {"href" : re.compile(r'\/title\/tt+\d*\/')})
	return title
Пример #17
0
def main(emotion):

    #sad=drama
    if (emotion == "Sad"):
        urlhere = 'http://www.imdb.com/search/title?genres=drama&amp;title_type=feature&amp;sort=moviemeter, asc'

    #disgust=musicals
    elif (emotion == "Disgust"):
        urlhere = 'http://www.imdb.com/search/title?genres=musical&amp;title_type=feature&amp;sort=moviemeter, asc'

    #anger=family
    elif (emotion == "Anger"):
        urlhere = 'http://www.imdb.com/search/title?genres=family&amp;title_type=feature&amp;sort=moviemeter, asc'

    #Anticipation=thriller
    elif (emotion == "Anticipation"):
        urlhere = 'http://www.imdb.com/search/title?genres=thriller&amp;title_type=feature&amp;sort=moviemeter, asc'

    #Fear=sport
    elif (emotion == "Fear"):
        urlhere = 'http://www.imdb.com/search/title?genres=sport&amp;title_type=feature&amp;sort=moviemeter, asc'

    #Enjoyment=Thriller
    elif (emotion == "Enjoyment"):
        urlhere = 'http://www.imdb.com/search/title?genres=thriller&amp;title_type=feature&amp;sort=moviemeter, asc'

    #Trust=western
    elif (emotion == "Trust"):
        urlhere = 'http://www.imdb.com/search/title?genres=western&amp;title_type=feature&amp;sort=moviemeter, asc'

    #Surprise=film-noir
    elif (emotion == "Surprise"):
        urlhere = 'http://www.imdb.com/search/title?genres=film_noir&amp;title_type=feature&amp;sort=moviemeter, asc'

# HTTP request to get the data of
# the whole page
    response = HTTP.get(urlhere)
    data = response.text

    # Parsing the data using
    # BeautifulSoup
    soup = SOUP(data, "lxml")

    # Extract movie titles from the
    # data using regex
    title = soup.find_all("a",
                          attrs={"href": re.compile(r'\/title\/tt+\d*\/')})
    return title
Пример #18
0
def main(genre):

    urlIMDB = "https://www.imdb.com/search/title/?genres=" + genre
    # HTTP request to get the data of
    # the whole page
    response = HTTP.get(urlIMDB)
    data = response.text

    # Parsing the data using
    # BeautifulSoup
    soup = SOUP(data, "lxml")

    # Extract movie titles from the
    # data using regex
    title = soup.find_all("a",
                          attrs={"href": re.compile(r'\/title\/tt+\d*\/')})
    return title
Пример #19
0
def f5():
    count = 0
    emotion = "Fear"
    print("FEAR MOVIES HAIN")
    urlhere = 'http://www.imdb.com/search/title?genres=sport&title_type=feature&sort=moviemeter, asc'
    response = HTTP.get(urlhere)
    data = response.text
    soup = SOUP(data, "lxml")
    title = soup.find_all("a", attrs={"href": re.compile(r'\/title\/tt+\d*\/')})
    for i in title:
        tmp = str(i).split('>')
        if (len(tmp) == 3):
            print(tmp[1][:-3])
        if (count > 11):
            break
        count += 1
    print()
    print()
Пример #20
0
def main(emotion):

    if (emotion == 2):
        urlhere = 'http://www.imdb.com/search/title?genres=drama&title_type=feature&sort=moviemeter, asc'

    # IMDb Url for Musical genre of
    # movie against emotion Disgust
    elif (emotion == 4):
        urlhere = 'http://www.imdb.com/search/title?genres=musical&title_type=feature&sort=moviemeter, asc'

    # IMDb Url for Family genre of
    # movie against emotion Anger
    elif (emotion == 1):
        urlhere = 'http://www.imdb.com/search/title?genres=family&title_type=feature&sort=moviemeter, asc'

    # IMDb Url for Thriller genre of
    # movie against emotion Anticipation
    elif (emotion == 5):
        urlhere = 'https://www.imdb.com/search/title/?genres=happy&title_type=feature&sort=moviemeter, asc'

    # IMDb Url for Sport genre of
    # movie against emotion Fear
    elif (emotion == 3):
        urlhere = 'http://www.imdb.com/search/title?genres=sport&title_type=feature&sort=moviemeter, asc'

    # IMDb Url for Thriller genre of
    # movie against emotion Enjoyment
    elif (emotion == 6):
        urlhere = 'https://www.imdb.com/search/title/?genres=comedy&title_type=feature&sort=moviemeter, asc'

    # HTTP request to get the data of
    # the whole page
    response = HTTP.get(urlhere)
    data = response.text

    # Parsing the data using
    # BeautifulSoup
    soup = SOUP(data, "lxml")

    # Extract movie titles from the
    # data using regex
    title = soup.find_all("a",
                          attrs={"href": re.compile(r'\/title\/tt+\d*\/')})
    return title
Пример #21
0
def get_movie(emotion):
    '''
	Function to web scrape from IMDb website by genre depending on user mood
	'''
    if (emotion == "neutral"):
        urlhere = 'http://www.imdb.com/search/title?genres=drama&title_type=feature&sort=moviemeter, asc'

    elif (emotion == "negative"):
        urlhere = 'http://www.imdb.com/search/title?genres=western&title_type=feature&sort=moviemeter, asc'

    elif (emotion == "positive"):
        urlhere = 'http://www.imdb.com/search/title?genres=sport&title_type=feature&sort=moviemeter, asc'

    response = HTTP.get(urlhere)
    data = response.text

    soup = SOUP(data, "lxml")

    title = soup.find_all("a",
                          attrs={"href": re.compile(r'\/title\/tt+\d*\/')})
    return title
Пример #22
0
def main(emotions):
    if emotions == "Sad":
        urlhere = 'http://www.imdb.com/search/title?genres=drama&title_type=feature&sort=moviemeter, asc'

    elif emotions == "Disgust":
        urlhere = 'http://www.imdb.com/search/title?genres=musical&title_type=feature&sort=moviemeter, asc'

    elif emotions == "Anger":
        urlhere = 'http://www.imdb.com/search/title?genres=family&title_type=feature&sort=moviemeter, asc'

    elif emotions == "Anticipation":
        urlhere = 'http://www.imdb.com/search/title?genres=thriller&title_type=feature&sort=moviemeter, asc'

    response = HTTP.get(urlhere)
    data = response.text

    soup = SOUP(data, "lxml")


    title = soup.find_all("a", attrs={"href": re.compile(r'\/title\/tt+\d*\/')})
    return title
Пример #23
0
def scrapAndProcess(emotion):

    url = ""
    if (emotion == "sad"):
        url = 'http://www.imdb.com/search/title?genres=drama&title_type=feature&sort=moviemeter, asc'
    elif (emotion == "disgust"):
        url = 'http://www.imdb.com/search/title?genres=musical&title_type=feature&sort=moviemeter, asc'
    elif (emotion == "anger"):
        url = 'http://www.imdb.com/search/title?genres=family&title_type=feature&sort=moviemeter, asc'
    elif (emotion == "anticipation"):
        url = 'http://www.imdb.com/search/title?genres=thriller&title_type=feature&sort=moviemeter, asc'
    elif (emotion == "fear"):
        url = 'http://www.imdb.com/search/title?genres=sport&title_type=feature&sort=moviemeter, asc'
    elif (emotion == "enjoyment"):
        url = 'http://www.imdb.com/search/title?genres=thriller&title_type=feature&sort=moviemeter, asc'
    elif (emotion == "trust"):
        url = 'http://www.imdb.com/search/title?genres=western&title_type=feature&sort=moviemeter, asc'
    elif (emotion == "surprise"):
        url = 'http://www.imdb.com/search/title?genres=film_noir&title_type=feature&sort=moviemeter, asc'

    movies = []

    try:
        if not url:
            return movies
        response = HTTP.get(url)
        data = response.text
        soup = SOUP(data, "lxml")
        flags = ["None", "X", "\n"]
        for movieName in soup.findAll(
                'a', attrs={"href": re.compile(r'\/title\/tt+\d*\/')}):
            movieName = str(movieName.string)
            if movieName not in flags:
                movies.append(movieName)

    except Exception as e:
        print(e)

    return movies
Пример #24
0
def target(emotion):
    url = ""
    if emotion == "disgust":
        url = 'http://www.imdb.com/search/title?genres=musical&amp;title_type=feature&amp;sort=moviemeter, asc'
    elif emotion == "sad":
        url = 'http://www.imdb.com/search/title?genres=drama&amp;title_type=feature&amp;sort=moviemeter, asc'
    elif emotion == "trust":
        url = 'http://www.imdb.com/search/title?genres=western&amp;title_type=feature&amp;sort=moviemeter, asc'
    elif emotion == "anger":
        url = 'http://www.imdb.com/search/title?genres=family&amp;title_type=feature&amp;sort=moviemeter, asc'
    elif emotion == "fear":
        url = 'http://www.imdb.com/search/title?genres=sport&amp;title_type=feature&amp;sort=moviemeter, asc'
    elif emotion == "anticipation" or "enjoyment":
        url = 'http://www.imdb.com/search/title?genres=thriller&amp;title_type=feature&amp;sort=moviemeter, asc'
    elif emotion == "surprise":
        url = 'http://www.imdb.com/search/title?genres=film_noir&amp;title_type=feature&amp;sort=moviemeter, asc'
    response = HTTP.get(url)
    data = response.text
    field = SOUP(data, "lxml")
    #REGEX EXTRACTION OF TITLES
    title = field.find_all("a",
                           attrs={"href": regex.compile(r'\/title\/tt+\d*\/')})
    return title
Пример #25
0
def main(emotion): 
  
    
    if(emotion == "Sad" or emotion == "sad"): 
        urlhere = 'https://www.imdb.com/list/ls009576722/'
  
    elif(emotion == "Disgust" or emotion == "disgust"): 
        urlhere = 'https://www.imdb.com/list/ls075745491/'
  
    elif(emotion == "Anger" or emotion == "anger"): 
        urlhere = 'https://www.imdb.com/list/ls000445157/'
  
    elif(emotion == "Anticipation" or emotion == "anticipation"): 
        urlhere = 'https://www.imdb.com/india/upcoming/'
  
    elif(emotion == "Fear" or emotion == "fear"): 
        urlhere = 'https://www.imdb.com/list/ls058201636/'
  
    elif(emotion == "Enjoyment" or emotion == "enjoyment"): 
        urlhere = 'https://www.imdb.com/list/ls005597767/'
  
    elif(emotion == "Trust" or emotion == "trust"): 
        urlhere = 'https://www.imdb.com/list/ls051594496/'
  
    elif(emotion == "Surprise" or emotion == "surprise"): 
        urlhere = 'https://www.imdb.com/list/ls008944391/'
  
    
    response = HTTP.get(urlhere) 
    data = response.text 
  
    
    soup = SOUP(data, "html.parser")

    title = soup.find_all("a", attrs = {"href" : re.compile(r'\/title\/tt+\d*\/')}) 
    return title 
Пример #26
0
def recommend(emotion):
    if emotion=="Sad":
        urlhere = 'http://www.imdb.com/search/title?genres=drama&title_type=feature&sort=moviemeter, asc'
    elif(emotion == "Disgust"): 
        urlhere = 'http://www.imdb.com/search/title?genres=musical&title_type=feature&sort=moviemeter, asc'
    elif(emotion == "Anger"): 
        urlhere = 'http://www.imdb.com/search/title?genres=family&title_type=feature&sort=moviemeter, asc'
    elif(emotion == "Anticipation"): 
        urlhere = 'http://www.imdb.com/search/title?genres=thriller&title_type=feature&sort=moviemeter, asc'
    elif(emotion == "Fear"): 
        urlhere = 'http://www.imdb.com/search/title?genres=sport&title_type=feature&sort=moviemeter, asc'
    elif(emotion == "Enjoyment"): 
        urlhere = 'http://www.imdb.com/search/title?genres=thriller&title_type=feature&sort=moviemeter, asc'
    elif(emotion == "Trust"): 
        urlhere = 'http://www.imdb.com/search/title?genres=western&title_type=feature&sort=moviemeter, asc'
    elif(emotion == "Surprise"): 
        urlhere = 'http://www.imdb.com/search/title?genres=film_noir&title_type=feature&sort=moviemeter, asc'
    
    ##web scraping to get HTML
    response=HTTP.get(urlhere)
    info=response.text
    soup=SOUP(info,"lxml") ##parsing to form tree
    name=soup.find_all("a",attrs={"href":re.compile(r'\/title\/tt+\d*\/')})
    return name
        def main(emotion):
            em=emotion.lower()
            # IMDb Url for Drama genre of 
            # movie against emotion Sad 
            if(em == "sad"):
                print("EMOTION DETECTED:",em)
                urlhere = 'http://www.imdb.com/search/title?genres=drama&title_type=feature&sort=moviemeter, asc'
                

            # IMDb Url for Musical genre of 
            # movie against emotion Disgust 
            elif(em == "disgust"):
                print("EMOTION DETECTED:",em)
                urlhere = 'http://www.imdb.com/search/title?genres=musical&title_type=feature&sort=moviemeter, asc'

            # IMDb Url for Family genre of 
            # movie against emotion Anger 
            elif(em == "angry"):
                print("EMOTION DETECTED:",em)
                urlhere = 'http://www.imdb.com/search/title?genres=family&title_type=feature&sort=moviemeter, asc'

            # IMDb Url for Thriller genre of 
            # movie against emotion Anticipation 
            elif(em == "neutral"):
                print("EMOTION DETECTED:",em)
                urlhere = 'http://www.imdb.com/search/title?genres=thriller&title_type=feature&sort=moviemeter, asc'

            # IMDb Url for Sport genre of 
            # movie against emotion Fear 
            elif(em == "scared"):
                print("EMOTION DETECTED:",em)
                urlhere = 'http://www.imdb.com/search/title?genres=sport&title_type=feature&sort=moviemeter, asc'

            # IMDb Url for Thriller genre of 
            # movie against emotion Enjoyment 
            elif(em == "happy"):
                print("EMOTION DETECTED:",em)
                urlhere = 'http://www.imdb.com/search/title?genres=thriller&title_type=feature&sort=moviemeter, asc'

            
            

            # IMDb Url for Film_noir genre of 
            # movie against emotion Surprise 
            elif(em == "surprised"):
                print("EMOTION DETECTED:",em)
                urlhere = 'http://www.imdb.com/search/title?genres=film_noir&title_type=feature&sort=moviemeter, asc'

            # HTTP request to get the data of 
            # the whole page 
            response = HTTP.get(urlhere) 
            data = response.text 

            # Parsing the data using 
            # BeautifulSoup 
            soup = SOUP(data, "lxml") 

            # Extract movie titles from the 
            # data using regex 
            title = soup.find_all("a", attrs = {"href" : re.compile(r'\/title\/tt+\d*\/')})
            title1 = soup.find_all("h3",{"class":"lister-item-header"})
            print("LIST OF APT MOVIES BASED ON USERS CURRENT EMOTION:")
            print(title1[0].text)
            rating = soup.find_all("div", {"class": "inline-block ratings-imdb-rating"})
            print("rating=",rating[0].text)
            print(title1[1].text)
            print("rating=",rating[1].text)
            print(title1[2].text)
            print("rating=",rating[2].text)
            print(title1[3].text)
            print("rating=",rating[3].text)
            print(title1[4].text)
            print("rating=",rating[4].text)
            print(title1[5].text)
            print("rating=",rating[5].text)
            print(title1[6].text)
            print("rating=",rating[6].text)
            print(title1[7].text)
            print("rating=",rating[7].text)
            print(title1[8].text)
            print("rating=",rating[8].text)
            print(title1[9].text)
            print("rating=",rating[9].text)
            return title1
Пример #28
0
def scrapAndProcess(emotion):

    # URL to which GET request will be made
    url = ""

    # IMDb URL for Drama genre of movie against emotion Sad
    if (emotion == "sad"):
        url = 'http://www.imdb.com/search/title?genres=drama&title_type=feature&sort=moviemeter, asc'

    # IMDb URL for Musical genre of movie against emotion Disgust
    elif (emotion == "disgust"):
        url = 'http://www.imdb.com/search/title?genres=musical&title_type=feature&sort=moviemeter, asc'

    # IMDb URL for Family genre of movie against emotion Anger
    elif (emotion == "anger"):
        url = 'http://www.imdb.com/search/title?genres=family&title_type=feature&sort=moviemeter, asc'

    # IMDb URL for Thriller genre of movie against emotion Anticipation
    elif (emotion == "anticipation"):
        url = 'http://www.imdb.com/search/title?genres=thriller&title_type=feature&sort=moviemeter, asc'

    # IMDb URL for Sport genre of movie against emotion Fear
    elif (emotion == "fear"):
        url = 'http://www.imdb.com/search/title?genres=sport&title_type=feature&sort=moviemeter, asc'

    # IMDb URL for Thriller genre of movie against emotion Enjoyment
    elif (emotion == "enjoyment"):
        url = 'http://www.imdb.com/search/title?genres=thriller&title_type=feature&sort=moviemeter, asc'

    # IMDb URL for Western genre of movie against emotion Trust
    elif (emotion == "trust"):
        url = 'http://www.imdb.com/search/title?genres=western&title_type=feature&sort=moviemeter, asc'

    # IMDb URL for Film_noir genre of movie against emotion Surprise
    elif (emotion == "surprise"):
        url = 'http://www.imdb.com/search/title?genres=film_noir&title_type=feature&sort=moviemeter, asc'

    # List to store all movie names
    movies = []

    # Try catch block to prevent abrupt termination of code if IMDb server is down
    try:
        # If entered emotion is not from one of the above, return empty movies list
        if not url:
            return movies

        # HTTP request to get the data of the whole page
        response = HTTP.get(url)

        # Accessing the text property of the response object
        data = response.text

        # Parsing the data using BeautifulSoup
        soup = SOUP(data, "lxml")

        # Pruning noisy data - the elements in this list can appear as movie names
        flags = ["None", "X", "\n"]

        # Extract movie titles from the data using regex
        for movieName in soup.findAll(
                'a', attrs={"href": re.compile(r'\/title\/tt+\d*\/')}):

            # Converting from bs4.element.NavigableString to python3 string
            movieName = str(movieName.string)

            # Checking if movie name is not in noisy data list
            if movieName not in flags:
                movies.append(movieName)

    # Catch exceptions - they might occur if the IMDb server is down
    except Exception as e:
        print(e)

    return movies
Пример #29
0
def scrape_rt(RT, num):
    response = requests.get(RT)
    data = SOUP(response.text, 'lxml')
    RT_dict = {}
    title_lst = []
    rel_lst = []
    reviews_lst = []

    # Rotten Tomatoes lists top 100 from each genre

    # as above, we hope to obtain name, grading, runtime, and rating
    for movie in data.findAll('tr'):
        # title
        title = movie.find("a", class_="unstyled articleLink")
        if title != None:
            cleanTitle = str(title).split('">')[1].split(" (")[0].strip(
                '\n').strip()
            RT_dict[cleanTitle] = []
            title_lst.append(cleanTitle)  #100

            # link to movie profile
            rel_link = str(title).split('href="')[1].split('">\n')[0]
            link = "https://www.rottentomatoes.com/" + rel_link
            RT_dict[cleanTitle].append(link)

        # numbers of reviews:
        num_reviews = movie.find('td', class_="right hidden-xs")
        if num_reviews != None:
            num_reviews = int(
                str(num_reviews).split('">')[1].split('</')[0])  #100

            # collect number of reviewers for later movie score adjustments
            reviews_lst.append(num_reviews)

    # rating
    for review, title, movie in zip(
            reviews_lst, title_lst,
            data.findAll('span', class_='tMeterIcon tiny')):
        rating = movie.find('span', class_="tMeterScore")
        rating = str(rating).split('">\xa0')[1].split('%</')[0]
        # transform RT rating into the same scale as IMDB rating (out of 10)
        weightedRating = int(rating) / 10

        # score adjustments
        weightedRating = weightedRating * log(log(review, 4), 5)
        weightedRating = round(weightedRating, 1)
        RT_dict[title].append(weightedRating)

    # to increase the efficiency of the script,
    # we are going to rank movies based on rating
    # and only look up movie profiles of top-ranked movies

    ranked_dict = rank_movies(RT_dict)
    ranked_dict = dict(list(ranked_dict.items())[0:num])
    for value in ranked_dict.values():
        rel_lst.append(value[0])
        value.pop(0)

    new_title_lst = list(ranked_dict.keys())

    # # grading and runtime information are inside movie profile links

    for title, link in zip(new_title_lst, rel_lst):
        response = requests.get(link)
        data_1 = SOUP(response.text, 'lxml')

        #movie summary
        for div_tag in data_1.findAll(
                'div', {'class': 'movie_synopsis clamp clamp-6 js-clamp'}):
            summary = str(div_tag.text).replace("\n", "")
            ranked_dict[title].insert(0, summary)

        for div_tag in data_1.findAll('li', {'class': 'meta-row clearfix'}):
            movie_label = div_tag.find('div', {
                'class': 'meta-label subtle'
            }).text
            if movie_label == "Rating:":
                rating_info = div_tag.find('div', {'class': 'meta-value'}).text
                rating_info = rating_info.replace("\n", "").replace(" ", "")
                ranked_dict[title].insert(1, rating_info)
            elif movie_label == "Runtime:":
                runtime_info = div_tag.find('div', {
                    'class': 'meta-value'
                }).text
                runtime_info = runtime_info.replace("\n", "").replace(" ", "")

                ranked_dict[title].insert(2, runtime_info)

    return ranked_dict
Пример #30
0
def scrape_IMDB(IMDB, num, folder_path=None):
    folder_path = "movie_summary/"  # you only need the folder_path when you need to store movie summary
    response = requests.get(IMDB)
    data = SOUP(response.text, 'lxml')

    # we hope to have movie's name, grading, runtime, and rating
    IMDB_dict = {}
    title_lst = []
    num_reviews = []

    # IMDB lists top 50 from each genre

    for movie in data.findAll('div', class_="lister-item-content"):
        # title
        title = movie.find("a",
                           attrs={"href": re.compile(r'\/title\/tt+\d*\/')})
        title = str(title).split('">')[1].split('</')[0]
        IMDB_dict[title] = []
        title_lst.append(title)

        # movie summary
        summary = movie.findAll('p', {'class': 'text-muted'})
        if summary != None:
            summary = str(summary).split(
                ', <p class="text-muted">')[1].replace("\n", "").replace(
                    "</p>]", "")  #clean the summary text
            IMDB_dict[title].append(summary)

        # grading
        grading = movie.find('span', class_="certificate")
        if grading != None:
            grading = str(grading).split('">')[1].split('</')[0]
        else:
            grading = "Not Found"
        IMDB_dict[title].append(grading)
        # runtime
        length = movie.find('span', class_="runtime")
        if length != None:
            length = str(length).split('">')[1].split('</')[0]
        else:
            length = "Not Found"
        IMDB_dict[title].append(length)

    # No. of reviewers
    for title, movie in zip(title_lst,
                            data.findAll('p',
                                         class_="sort-num_votes-visible")):
        numRater = int(re.sub("[^0-9]", "", movie.text))
        num_reviews.append(numRater)

    # rating
    for review, title, movie in zip(num_reviews, title_lst,
                                    data.findAll('div', class_="ratings-bar")):
        rating = movie.find('div', class_="inline-block ratings-imdb-rating")
        try:
            rating = float(
                re.search(r'[\d]*[.][\d]+',
                          str(rating).split(' ')[3]).group())
        except AttributeError:
            rating = float(
                re.search(r'\d+',
                          str(rating).split(' ')[3]).group())

        # score adjustments based on number of reviewers through logistic regression

        weightedRating = rating * log(log(review, 5), 10)
        weightedRating = round(weightedRating, 1)

        IMDB_dict[title].append(weightedRating)

    ranked_dict = rank_movies(IMDB_dict)
    ranked_dict = dict(list(ranked_dict.items())[0:num])

    # print(ranked_dict)

    return ranked_dict