def save_data(): movies_data = scrape_top_list() for one_movie in movies_data: id_movie = (one_movie['urls'][-10:-1]) exists = os.path.exists("screpingdata/" + str(id_movie) + ".json") cwd = os.getcwd() if exists: with open(cwd + "/screpingdata/" + str(id_movie) + ".json", "r+") as file: data = file.read() load_data = json.loads(data) return (load_data) else: for one_movie in movies_data: id_movie = (one_movie['urls'][-10:-1]) # task_no. 9 sleep_time = random.randint(1, 3) time.sleep(sleep_time) url = (one_movie["urls"]) screpe_movie_data = scrape_movie_details(url) with open("screpingdata/" + str(id_movie) + ".json", "w") as file: data = json.dumps(screpe_movie_data, indent=4, sort_keys=True) write_data = file.write(data) return (write_data)
def movie_detail(): movie = scrape_top_list() movie_list = [] for url in movie: movie_list.append(url["urls"]) all_movie_detaile = [] for movie_url in movie_list: one_movie_detaile = scrape_movie_details(movie_url) all_movie_detaile.append(one_movie_detaile) return (all_movie_detaile)
def load_movies_data(): movies_list = scrape_top_list() all_movies_data = [] for movie_data in movies_list: ids = (movie_data['urls'][-10:-1]) exists = os.path.exists("screpingdata/" + str(ids) + ".json") cwd = os.getcwd() if exists: with open(cwd + "/screpingdata/" + str(ids) + ".json", "r+") as file: data = json.load(file) all_movies_data.append(data) return (all_movies_data)
from pprint import pprint from IMDB_task1 import scrape_top_list import os, json movies_list = scrape_top_list() # in this task we have to make a dictionary which is conten the data of director's languag. # in how meny language make a movie and how meny time. def analyse_language_and_directors(movies_list): all_movies_data = [] final_dic = {} # for checking file exist or not for movie_data in movies_list: ids = (movie_data['urls'][-10:-1]) exists = os.path.exists("screpingdata/" + str(ids) + ".json") cwd = os.getcwd() if exists: with open(cwd + "/screpingdata/" + str(ids) + ".json", "r+") as file: data = json.load(file) all_movies_data.append(data) language_list = [] director_list = [] # for taking languages and directorse name 1 time for movis in all_movies_data: for language in movis["Language"]: if language not in language_list: language_list.append(language) for director in movis["Director"]: if director not in director_list: director_list.append(director)
# this task for take cast data of the all data of the movie from bs4 import BeautifulSoup import requests,json from pprint import pprint from IMDB_task1 import scrape_top_list all_movies_data = scrape_top_list() for data in all_movies_data[:5] : url = data["urls"] # task13 def scrape_movie_cast(url): movie_url = requests.get(url) movie_text = movie_url.text soup = (BeautifulSoup(movie_text,"html.parser")) article = soup.find('div', attrs ={ "class" : "article" , 'id':'titleCast'}) see_more = (article.find('div', attrs ={ "class" : "see-more"})) cast_url = ( url + (see_more).a["href"]) cast = requests.get(cast_url) cast_soup = (BeautifulSoup(cast.text,"html.parser")) fulcredits = cast_soup.find('div', attrs ={ "class" : "header" , 'id':'fullcredits_content'}) table = fulcredits.find("table",class_= "cast_list") tr = (table.find_all("tr")) tr.pop(0) movie_cast = [] for one_tr in tr : all_movies_cast = {} td = one_tr.find_all("td") if len(td) > 1: table_data = (td[1]) all_movies_cast["imdb_id"] = ((table_data.a["href"])[6:-1]) all_movies_cast["name"] = ((table_data.text).strip())