def downloadUltimateMovieRankingsYearlyData(self, year, outdir, debug=False): yname = str(year) url = "https://www.ultimatemovierankings.com/{0}-top-box-office-movies/".format( year) url = "https://www.ultimatemovierankings.com/top-grossing-movies-of-{0}/".format( year) url = "https://www.ultimatemovierankings.com/{0}-movies/".format(year) url = "https://www.ultimatemovierankings.com/{0}-top-grossing-movies/".format( year) url = "https://www.ultimatemovierankings.com/biggest-box-office-hits-of-{0}/".format( year) url = "https://www.ultimatemovierankings.com/top-grossing-{0}-movies/".format( year) url = "https://www.ultimatemovierankings.com/ranking-{0}-movies/".format( year) url = "https://www.ultimatemovierankings.com/best-worst-movies-{0}/".format( year) savename = setFile(outdir, yname + ".p") if isFile(savename): return if debug: print("Downloading/Saving {0}".format(savename)) try: getWebData(base=url, savename=savename, useSafari=False) sleep(2) except: sleep(0.2)
def downloadRottenTomatoesYearlyData(self, year, outdir, debug=False): yname = str(year) url = "https://www.rottentomatoes.com/top/bestofrt/?year=" + yname savename = setFile(outdir, "{0}.p".format(year)) if isFile(savename): return if debug: print("Downloading/Saving {0}".format(savename)) getWebData(base=url, savename=savename, useSafari=False)
def downloadWikiFilmYearlyData(self, year, outdir, debug=False): url = "https://en.wikipedia.org/wiki/{0}_in_film".format(year) savename = setFile(outdir, str(year) + ".p") if isFile(savename): return if debug: print("Downloading {0}".format(url)) getWebData(base=url, savename=savename, useSafari=False) sleep(1)
def downloadSAGCategoryData(self, category, outdir, debug=False): url = "https://en.wikipedia.org/wiki/Screen_Actors_Guild_Award_for_{0}".format(category) savename = setFile(outdir, category+".p") if isFile(savename): return if debug: print("Downloading {0}".format(url)) getWebData(base=url, savename=savename, useSafari=False) sleep(1)
def downloadRottenTomatoesTop100Data(self, genre, outdir, debug=False): baseurl = "https://www.rottentomatoes.com" outdir = setDir(self.getDataDir()) if not isDir(outdir): mkDir(outdir) url = "/top/bestofrt/top_100_" + genre + "_movies/" url = baseurl + url savename = setFile(outdir, genre + ".p") if isFile(savename): return if debug: print("Downloading/Saving {0}".format(savename)) getWebData(base=url, savename=savename, useSafari=False, dtime=10) sleep(2)
def downloadFilms101YearlyData(self, year, outdir, debug=False): url="http://www.films101.com/y{0}r.htm".format(year) savename = setFile(outdir, "{0}.p".format(year)) if isFile(savename): return try: if debug: print("Downloading/Saving {0}".format(savename)) getWebData(base=url, savename=savename, useSafari=False) except: return sleep(2)
def downloadTeamStandingsByYear(self, year, debug=False): baseurl = self.getBase() suburl = "college-football/standings/_/season" url = join(baseurl, suburl, str(year)) savename = setFile(self.getSeasonDir(), str(year) + ".p") if isFile(savename): return if debug: print("Downloading {0}".format(url)) getWebData(base=url, savename=savename, useSafari=False) sleep(10 + 2 * random())
def downloadTeamStatisticsDataByYear(self, idval, name, year, debug=False): baseurl = self.getBase() suburl = "college-football/team/stats/_/id/{0}/season".format(idval) url = join(baseurl, suburl, str(year)) outputdir = self.getYearlyStatisticsDir(year) savename = setFile(outputdir, "{0}-{1}.p".format(name, year)) if isFile(savename): return if debug: print("Downloading {0} to {1}".format(url, savename)) getWebData(base=url, savename=savename, useSafari=False) sleep(15 + 2 * random())
def downloadBoxOfficeMojoWeekendData(self, year, week, outdir, debug=False): yname = str(year) if week < 10: wname = "0" + str(week) else: wname = str(week) url = "http://www.boxofficemojo.com/weekend/chart/?yr=" + yname + "&wknd=" + wname + "&p=.htm" savename = setFile(outdir, yname + "-" + wname + ".p") if isFile(savename): return if debug: print("Downloading/Saving {0}".format(savename)) getWebData(base=url, savename=savename, useSafari=False) sleep(2)
def downloadGameDataByID(self, gameID, year, test=False, debug=False): gamesDir = self.getYearlyGamesDir(year) url = "http://www.espn.com/college-football/playbyplay?gameId={0}".format( gameID) savename = setFile(gamesDir, "{0}.p".format(gameID)) if isFile(savename): from os.path import getsize size = round(getsize(savename) / 1e3) if size < 1: removeFile(savename, debug=True) if test: print("Downloading {0} to {1}".format(url, savename)) return getWebData(base=url, savename=savename, dtime=6, useSafari=True, debug=True) sleep(6)
def downloadWikipediaYearlyData(self, year, outdir, debug=False): base = "https://en.wikipedia.org/wiki/" dmap = {} val = str(int(year) - 1927) + "th_Academy_Awards" val = val.replace("1th_", "1st_") val = val.replace("2th_", "2nd_") val = val.replace("3th_", "3rd_") val = val.replace("11st_", "11th_") val = val.replace("12nd_", "12th_") val = val.replace("13rd_", "13th_") try: url = base + val except: print("Could not create url for", year) return savename = setFile(outdir, str(year) + ".p") if isFile(savename): return if debug: print("Downloading {0}".format(url)) getWebData(base=url, savename=savename, useSafari=False) sleep(1)