Пример #1
0
    def downloadUltimateMovieRankingsYearlyData(self,
                                                year,
                                                outdir,
                                                debug=False):
        yname = str(year)

        url = "https://www.ultimatemovierankings.com/{0}-top-box-office-movies/".format(
            year)
        url = "https://www.ultimatemovierankings.com/top-grossing-movies-of-{0}/".format(
            year)
        url = "https://www.ultimatemovierankings.com/{0}-movies/".format(year)
        url = "https://www.ultimatemovierankings.com/{0}-top-grossing-movies/".format(
            year)
        url = "https://www.ultimatemovierankings.com/biggest-box-office-hits-of-{0}/".format(
            year)
        url = "https://www.ultimatemovierankings.com/top-grossing-{0}-movies/".format(
            year)
        url = "https://www.ultimatemovierankings.com/ranking-{0}-movies/".format(
            year)
        url = "https://www.ultimatemovierankings.com/best-worst-movies-{0}/".format(
            year)

        savename = setFile(outdir, yname + ".p")
        if isFile(savename): return
        if debug:
            print("Downloading/Saving {0}".format(savename))
        try:
            getWebData(base=url, savename=savename, useSafari=False)
            sleep(2)
        except:
            sleep(0.2)
Пример #2
0
 def downloadRottenTomatoesYearlyData(self, year, outdir, debug=False):
     yname = str(year)
     url = "https://www.rottentomatoes.com/top/bestofrt/?year=" + yname
     savename = setFile(outdir, "{0}.p".format(year))
     if isFile(savename): return
     if debug:
         print("Downloading/Saving {0}".format(savename))
     getWebData(base=url, savename=savename, useSafari=False)
Пример #3
0
 def downloadWikiFilmYearlyData(self, year, outdir, debug=False):
     url = "https://en.wikipedia.org/wiki/{0}_in_film".format(year)
     savename = setFile(outdir, str(year) + ".p")
     if isFile(savename): return
     if debug:
         print("Downloading {0}".format(url))
     getWebData(base=url, savename=savename, useSafari=False)
     sleep(1)
Пример #4
0
    def downloadSAGCategoryData(self, category, outdir, debug=False):

        url = "https://en.wikipedia.org/wiki/Screen_Actors_Guild_Award_for_{0}".format(category)
        savename = setFile(outdir, category+".p")
        if isFile(savename): return
        if debug:
            print("Downloading {0}".format(url))
        getWebData(base=url, savename=savename, useSafari=False)
        sleep(1)
Пример #5
0
 def downloadRottenTomatoesTop100Data(self, genre, outdir, debug=False):
     baseurl = "https://www.rottentomatoes.com"
     outdir = setDir(self.getDataDir())
     if not isDir(outdir): mkDir(outdir)
     url = "/top/bestofrt/top_100_" + genre + "_movies/"
     url = baseurl + url
     savename = setFile(outdir, genre + ".p")
     if isFile(savename): return
     if debug:
         print("Downloading/Saving {0}".format(savename))
     getWebData(base=url, savename=savename, useSafari=False, dtime=10)
     sleep(2)
Пример #6
0
 def downloadFilms101YearlyData(self, year, outdir, debug=False):
     url="http://www.films101.com/y{0}r.htm".format(year)
     savename = setFile(outdir, "{0}.p".format(year))
     if isFile(savename): return
     
     try:
         if debug:
             print("Downloading/Saving {0}".format(savename))
         getWebData(base=url, savename=savename, useSafari=False)
     except:
         return
     sleep(2)
Пример #7
0
    def downloadTeamStandingsByYear(self, year, debug=False):
        baseurl = self.getBase()
        suburl = "college-football/standings/_/season"
        url = join(baseurl, suburl, str(year))

        savename = setFile(self.getSeasonDir(), str(year) + ".p")
        if isFile(savename):
            return

        if debug:
            print("Downloading {0}".format(url))
        getWebData(base=url, savename=savename, useSafari=False)
        sleep(10 + 2 * random())
Пример #8
0
    def downloadTeamStatisticsDataByYear(self, idval, name, year, debug=False):
        baseurl = self.getBase()
        suburl = "college-football/team/stats/_/id/{0}/season".format(idval)
        url = join(baseurl, suburl, str(year))

        outputdir = self.getYearlyStatisticsDir(year)
        savename = setFile(outputdir, "{0}-{1}.p".format(name, year))
        if isFile(savename):
            return

        if debug:
            print("Downloading {0} to {1}".format(url, savename))
        getWebData(base=url, savename=savename, useSafari=False)
        sleep(15 + 2 * random())
Пример #9
0
    def downloadBoxOfficeMojoWeekendData(self,
                                         year,
                                         week,
                                         outdir,
                                         debug=False):
        yname = str(year)
        if week < 10:
            wname = "0" + str(week)
        else:
            wname = str(week)

        url = "http://www.boxofficemojo.com/weekend/chart/?yr=" + yname + "&wknd=" + wname + "&p=.htm"
        savename = setFile(outdir, yname + "-" + wname + ".p")
        if isFile(savename): return
        if debug:
            print("Downloading/Saving {0}".format(savename))
        getWebData(base=url, savename=savename, useSafari=False)
        sleep(2)
Пример #10
0
    def downloadGameDataByID(self, gameID, year, test=False, debug=False):
        gamesDir = self.getYearlyGamesDir(year)
        url = "http://www.espn.com/college-football/playbyplay?gameId={0}".format(
            gameID)
        savename = setFile(gamesDir, "{0}.p".format(gameID))

        if isFile(savename):
            from os.path import getsize
            size = round(getsize(savename) / 1e3)
            if size < 1:
                removeFile(savename, debug=True)

        if test:
            print("Downloading {0} to {1}".format(url, savename))
            return
        getWebData(base=url,
                   savename=savename,
                   dtime=6,
                   useSafari=True,
                   debug=True)
        sleep(6)
Пример #11
0
    def downloadWikipediaYearlyData(self, year, outdir, debug=False):

        base = "https://en.wikipedia.org/wiki/"
        dmap = {}
        val = str(int(year) - 1927) + "th_Academy_Awards"
        val = val.replace("1th_", "1st_")
        val = val.replace("2th_", "2nd_")
        val = val.replace("3th_", "3rd_")
        val = val.replace("11st_", "11th_")
        val = val.replace("12nd_", "12th_")
        val = val.replace("13rd_", "13th_")
        try:
            url = base + val
        except:
            print("Could not create url for", year)
            return

        savename = setFile(outdir, str(year) + ".p")
        if isFile(savename): return
        if debug:
            print("Downloading {0}".format(url))
        getWebData(base=url, savename=savename, useSafari=False)
        sleep(1)