def movie_detail_list(): list1 = [] top_movies = scrape_top_list() for i in top_movies: link = (i["url"]) data = get_movie_list_details(link) list1.append(data) return (list1)
def cach(): movies = scrape_top_list() all_data = [] for i in movies: url = i["Url"][-9:] with open(url + ".json", "r") as file: data = json.load(file) j_url = url + ".json" if not (os.path.exists(j_url)): print("hi") url2 = "https://www.imdb.com/title/" + url + "/fullcredits?ref_=tt_cl_sm#cast" data["cast"] = actores(url2) with open(j_url, "w+") as file: d_data = json.dumps(data) file.write(d_data) all_data.append(data) return all_data
def analyse_movies_genre(): url = scrape_top_list() dict1 = {} list1 = [] all_data = [] for i in url: u = (i['url'][-9:-1]) + ".json" if os.path.exists(u): with open(u, 'r+') as folder: data = json.load(folder) list1.append(data['Genres'][0]) all_data.append(data) list2 = [] for j in list1: if j not in list2: list2.append(j) for x in list2: print(x) count = 0 for t in all_data: if x in t['Genres']: count += 1 dict1[x] = count return (dict1)
from web1 import scrape_top_list import os,json from pprint import pprint from web4 import get_movie_list_details # import time # import random url=scrape_top_list() for i in url: u=(i['url'][-9:-1])+".json" with open(u,'r+') as folder: data = json.load(folder) dic={} for j in data['cast']: if j['name'] not in data: dic[j['name']]=1 else: dic[j['name']]+=1 pprint(dic)
import os, json, time, random from web1 import scrape_top_list from web4 import scrape_movie_details movies = scrape_top_list() for i in movies: # time.sleep(random.randint(5,10)) url = i["Url"][-9:] + ".json" if not (os.path.exists(url)): all_movies = scrape_movie_details(i["Url"]) with open(url, "w") as file: data = json.dumps(all_movies) file.write(data) print("nahi thi") else: print("hai")