Exemplo n.º 1
0
    def saveCorrections(self, debug=True):
        corrsavename = setFile(self.getDataDir(), "corr.yaml")
        corrData = getFile(corrsavename)        

        try:
            savename = setFile(self.getDataDir(), "saved.yaml")
            savedData = getFile(savename)
        except:
            raise ValueError("Could not access saved data!")
            savedData = {}

        if corrData is None:
            print("There is no corrections data.")
        else:
            print("Found {0} old corrections".format(len(savedData)))
            print("Found {0} new corrections".format(len(corrData)))
            for movie,corrs in corrData.items():
                if savedData.get(movie) is None:
                    if debug:
                        print("Adding {0}".format(movie))
                    savedData[movie] = corrs
                else:
                    newSaved = list(set(savedData[movie] + corrs))
                    if len(newSaved) != len(savedData[movie]):
                        print("Adding new corrections to {0}".format(movie))
                    savedData[movie] = newSaved

            try:
                savename = setFile(self.getDataDir(), "saved.yaml")
                saveFile(idata=savedData, ifile=savename, debug=debug)        
                print("There are {0} total corrections".format(len(savedData)))
            except:
                raise ValueError("There was an error saving the saved corrctions yaml file!")
Exemplo n.º 2
0
    def createRawOscarData(self, debug=True):
        print("Checking for poorly parsed oscar data.")
        indir = self.wikiData.getResultsDir()
        files = sorted(findExt(indir, ext=".json"))
        if debug:
            print("Found {0} oscar files".format(len(files)))
        yearlyData = {}
        for ifile in files:
            year = getBaseFilename(ifile)
            yearlyData[year] = getFile(ifile)

        savename = setFile(self.getCorrectionsDir(), "saved.yaml")
        if not isFile(savename):
            savedData = {}
        else:
            savedData = getFile(savename)

        for year in savedData.keys():
            for title in savedData[year].keys():
                savedWinner = savedData[year][title].get("Winner")
                savedNominees = savedData[year][title].get("Nominees")
                if savedWinner is not None:
                    print("Overwritting {0} {1} winner".format(year, title))
                    yearlyData[year][title]["Winner"] = savedWinner
                if savedNominees is not None:
                    print("Overwritting {0} {1} nominees".format(year, title))
                    yearlyData[year][title]["Nominees"] = savedNominees

        savename = setFile(self.getCorrectionsDir(), "raw.yaml")
        saveFile(idata=yearlyData, ifile=savename)
Exemplo n.º 3
0
    def mergeArtistAlbumIDMap(self):
        print("="*50)
        print("")
        ts = timestat("Merging ArtistAlbumID DBs for ==> {0} <==".format(self.db))
        print("")
        print("="*50)
                
        mergerData = self.mam.getMergerDataByDB(self.db)
        
        savenames = ["IDToAlbumNames", "IDToAlbumRefs"]
        for basename in savenames:
            savename = setFile(self.disc.getDiscogDBDir(), "Artist{0}PreMerge.p".format(basename))
            savedata = getFile(savename).to_dict()
            print("Found {0} entries.".format(len(savedata)))
        
            
            fromIDs = mergerData.apply(lambda x: len(x["MergeData"])).sum()
            toIDs   = len(mergerData)
            
            print("")
            print("================================================")
            print("  Merger From [{0}] DB IDs To [{1}] New IDs".format(fromIDs, toIDs))
            print("    Pre Merge [{0}]".format(len(savedata)))

            for artistName,artistData in mergerData.iteritems():
                newID = artistData["ID"]
                dbIDs = artistData["MergeData"].keys()
                #print(newID,'\t',len(dbIDs),'\t',artistName)

                savedata[newID] = {}
                for i,artistID in enumerate(dbIDs):
                    if savedata.get(artistID) is not None:
                        for mediaName, mediaData in savedata[artistID].items():
                            if savedata[newID].get(mediaName) is not None:
                                savedata[newID][mediaName].update(mediaData)
                            else:
                                savedata[newID][mediaName] = mediaData

                for artistID in dbIDs:
                    try:
                        del savedata[artistID]
                    except:
                        print("Could not delete merged ID {0}".format(artistID))
                        
        
            print("   Post Merge [{0}]".format(len(savedata)))
            print("================================================")
            print("")

            
            savename = setFile(self.disc.getDiscogDBDir(), "Artist{0}.p".format(basename))
            print("Saving {0} entries to {1}\n".format(len(savedata), savename))
            saveFile(ifile=savename, idata=Series(savedata), debug=True)
            sleep(0.5)

        ts.stop()                
Exemplo n.º 4
0
def main(args):
    cwd = getcwd()

    albumSegments = {}
    discSegments = {}

    for ifile in findPattern("./", pattern="."):
        mid = mp3ID(ifile)

        try:
            mid = mp3ID(ifile)
        except:
            print("Error reading file {0}".format(ifile))
            continue
        album = mid.getAlbum()
        print("Album: {0}".format(album))
        if album is not None:
            album = album[0]
        if albumSegments.get(album) is None:
            albumSegments[album] = []
        albumSegments[album].append(ifile)

        disc = mid.getDiscNumber()
        if disc is not None:
            disc = disc[0]
        if discSegments.get(disc) is None:
            discSegments[disc] = []
        discSegments[disc].append(ifile)

    if args.album is True:
        print("Album Segments: {0}".format(albumSegments.keys()))
        for album, albumFiles in albumSegments.items():
            albumDir = setDir(cwd, album)
            mkDir(albumDir)
            for ifile in albumFiles:
                src = ifile
                dst = setFile(albumDir, getBasename(ifile))
                print("Moving [{0}] to [{1}]".format(src, dst))
                moveFile(src, dst, debug=True)

    if args.disc is True:
        print("Disc Segments: {0}".format(discSegments.keys()))
        for disc, discFiles in discSegments.items():
            discDir = setDir(cwd, "Disc {0}".format(disc))
            mkDir(discDir)
            for ifile in discFiles:
                src = ifile
                dst = setFile(discDir, getBasename(ifile))
                #print("Moving [{0}] to [{1}]".format(src, dst))
                moveFile(src, dst, debug=True)
Exemplo n.º 5
0
    def processWikiFilmYearlyData(self, procYear=None, debug=False):
        outdir = self.getDataDir()
        if procYear == None:
            files = findExt(outdir, ext=".p")
        else:
            files = findPatternExt(outdir, pattern=str(procYear), ext=".p")

        from collections import OrderedDict
        movies = OrderedDict()
        yearlyData = {}

        for ifile in sorted(files):
            if debug:
                print("Processing {0}".format(ifile))
            year = getBaseFilename(ifile)
            movies[year] = self.parseWikiFilmYearlyData(ifile, debug=False)

            yearlyData[year] = sorted(movies[year].items(),
                                      key=operator.itemgetter(1),
                                      reverse=False)
            print("---->", year,
                  " (Top 5/{0} Movies) <----".format(len(yearlyData[year])))
            for item in yearlyData[year][:5]:
                print(item)
            print('\n')

        savename = setFile(self.getResultsDir(), "{0}.json".format(self.name))
        print("Saving {0} WikiFilm data to {1}".format(len(yearlyData),
                                                       savename))
        saveFile(savename, yearlyData)
Exemplo n.º 6
0
    def createArtistMetadataMap(self):
        ts = timestat("Creating Artist DBs")

        artistIDGenre          = {}
        artistIDStyle          = {}
        artistIDCollaborations = {}

        albumsMetadataDBDir = self.disc.getAlbumsMetadataDBDir()
        files = findPatternExt(albumsMetadataDBDir, pattern="-ArtistMetadata", ext='.p')

        for ifile in files:
            print(ifile,'\t',end="")
            for artistID,artistData in getFile(ifile).items():
                genre   = artistData['Genre']
                artistIDGenre[artistID] = genre
                artists = artistData['Artists']
                artistIDCollaborations[artistID] = artists
                style   = artistData['Style']
                artistIDStyle[artistID] = style
            print(len(artistIDGenre))
        print("\n\n==============================================\n")


        savenames = {"IDToGenre": artistIDGenre, "IDToStyle": artistIDStyle, "IDToCollaborations": artistIDCollaborations}
        for basename,savedata in savenames.items():
            savename = setFile(self.disc.getDiscogDBDir(), "Artist{0}.p".format(basename))
            print("Saving {0} entries to {1}\n".format(len(savedata), savename))
            saveFile(ifile=savename, idata=Series(savedata), debug=True)   

        ts.stop()
Exemplo n.º 7
0
    def parseRottenTomatoes(self, debug=False):
        outdir = self.getDataDir()
        files = findExt(outdir, ext=".p")

        movies = {}
        for ifile in files:
            result = self.parseRottenTomatoesFile(ifile, debug=debug)
            for year, yearlyResult in result.items():
                if movies.get(year) is None:
                    movies[year] = yearlyResult
                else:
                    movies[year] = {**movies[year], **yearlyResult}

        yearlyData = {}
        for year in movies.keys():
            yearlyData[year] = sorted(movies[year].items(),
                                      key=operator.itemgetter(1),
                                      reverse=True)
            print("---->", year,
                  " (Top 5/{0} Movies) <----".format(len(yearlyData[year])))
            for item in yearlyData[year][:5]:
                print(item)
            print('\n')

        savename = setFile(self.getResultsDir(), "rottentomatoes.json")
        print("Saving", len(yearlyData), "yearly results to", savename)
        saveFile(savename, yearlyData)
Exemplo n.º 8
0
 def __init__(self, debug=False):
     self.db     = "DatPiff"
     self.disc   = dbBase(self.db.lower())
     self.artist = artistDP(self.disc)
     self.dutils = datpiffUtils()
     self.dutils.setDiscogs(self.disc)
     self.debug  = debug
     
     ## MultiArtist
     self.mulArts  = multiartist()
     
     print("DatPiff ArtistsDir: {0}".format(self.disc.getArtistsDir()))
     if not isDir(self.disc.getArtistsDir()):
         raise ValueError("Could not find artist dir for DatPiff")
     self.knownDir  = setDir(self.disc.getArtistsDir(), "known")
     if not isDir(self.knownDir):
         print("Make sure that Piggy is loaded!!!")
         raise ValueError("Could not find known [{0}] dir for DatPiff".format(self.knownDir))
     self.knownFile = setFile(self.knownDir, "datPiffKnown.p")
     if not isFile(self.knownFile):
         raise ValueError("Known File [{0}] does not exist".format(self.knownFile))
     
     self.baseURL   = "https://www.datpiff.com/"
     self.searchURL = "https://www.datpiff.com/mixtapes-search?"
     
     super().__init__(self.db, self.disc, self.artist, self.dutils, debug=debug)
Exemplo n.º 9
0
    def processAACTACategoryData(self, debug=False):
        outdir = self.getDataDir()
        files = findExt(outdir, ext="*.p")

        from collections import OrderedDict
        movies = OrderedDict()
        print(files)
        for ifile in files:

            if debug:
                print("Processing {0}".format(ifile))
            category = getBaseFilename(ifile)
            results = self.parseAACTACategoryData(ifile, category, debug=debug)

            if len(results) == 0:
                raise ValueError("No results for {0}".format(ifile))

            for year, yearData in results.items():
                for category, categoryData in yearData.items():
                    if movies.get(year) is None:
                        movies[year] = []
                    for movie in categoryData:
                        movies[year].append(movie)

        for year in movies.keys():
            movies[year] = list(set(movies[year]))
            yearlyMovies = movies[year]
            movies[year] = []
            for movie in yearlyMovies:
                movies[year].append([movie, 10])

        savename = setFile(self.getResultsDir(), "{0}.json".format(self.name))
        print("Saving {0} Years of AACTA Data to {1}".format(
            len(movies), savename))
        saveFile(savename, movies)
Exemplo n.º 10
0
    def rmIDFromDB(self, artistID, modValue=None):
        print("Trying to remove data from ArtistID {0}".format(artistID))
        if modValue is None:
            modValue  = self.dutils.getDiscIDHashMod(discID=artistID, modval=self.disc.getMaxModVal())
        artistDBDir = self.disc.getArtistsDBDir()
        dbname  = setFile(artistDBDir, "{0}-DB.p".format(modValue))     
        print("Loading {0}".format(dbname))
        dbdata  = getFile(dbname)
        
        saveVal = False

        if isinstance(artistID, str):
            artistID = [artistID]
        elif not isinstance(artistID, list):
            raise ValueError("Not sure what to do with {0}".format(artistID))
            
        for ID in artistID:
            try:
                del dbdata[ID]
                print("Deleted {0}".format(ID))
                saveVal = True
            except:
                print("Not there...")

            self.rmIDFiles(ID)

        if saveVal:
            print("Saving {0}".format(dbname))
            saveFile(idata=dbdata, ifile=dbname)
        else:
            print("No reason to save {0}".format(dbname))
Exemplo n.º 11
0
    def downloadUltimateMovieRankingsYearlyData(self,
                                                year,
                                                outdir,
                                                debug=False):
        yname = str(year)

        url = "https://www.ultimatemovierankings.com/{0}-top-box-office-movies/".format(
            year)
        url = "https://www.ultimatemovierankings.com/top-grossing-movies-of-{0}/".format(
            year)
        url = "https://www.ultimatemovierankings.com/{0}-movies/".format(year)
        url = "https://www.ultimatemovierankings.com/{0}-top-grossing-movies/".format(
            year)
        url = "https://www.ultimatemovierankings.com/biggest-box-office-hits-of-{0}/".format(
            year)
        url = "https://www.ultimatemovierankings.com/top-grossing-{0}-movies/".format(
            year)
        url = "https://www.ultimatemovierankings.com/ranking-{0}-movies/".format(
            year)
        url = "https://www.ultimatemovierankings.com/best-worst-movies-{0}/".format(
            year)

        savename = setFile(outdir, yname + ".p")
        if isFile(savename): return
        if debug:
            print("Downloading/Saving {0}".format(savename))
        try:
            getWebData(base=url, savename=savename, useSafari=False)
            sleep(2)
        except:
            sleep(0.2)
Exemplo n.º 12
0
    def getArtistModValFiles(self, modVal, previousDays=5, force=False):
        artistDir = self.disc.getArtistsDir()
        maxModVal = self.disc.getMaxModVal()
                    
        artistDBDir = self.disc.getArtistsDBDir()        
        
        dirVal = setDir(artistDir, str(modVal))
        files  = findExt(dirVal, ext='.p')
        dbname = setFile(artistDBDir, "{0}-DB.p".format(modVal))
        
        now    = datetime.now()
        if isFile(dbname):
            lastModified = datetime.fromtimestamp(path.getmtime(dbname))
            if force is True:
                lastModified = None
        else:
            lastModified = None

        newFiles = None
        if lastModified is None:
            newFiles = files
            print("  ===> Parsing all {0} files for modval {1}".format(len(newFiles), modVal))
        else:
            numNew    = [ifile for ifile in files if (now-datetime.fromtimestamp(path.getmtime(ifile))).days < previousDays]
            numRecent = [ifile for ifile in files if datetime.fromtimestamp(path.getmtime(ifile)) > lastModified]
            newFiles  = list(set(numNew).union(set(numRecent)))
            print("  ===> Found new {0} files (< {1} days) to parse for modval {2}".format(len(newFiles), previousDays, modVal))
        return newFiles
Exemplo n.º 13
0
    def parseArtistFiles(self, force=False, debug=False):   
        from glob import glob
        
        artistDir = self.disc.getArtistsDir()
        
        artistDBData = {}
                
        files = findExt(self.knownDir, ext='.p')        
        files = glob("/Volumes/Biggy/Discog/artists-datpiff/*/*.p")
        print("Found {0} downloaded search terms".format(len(files)))
        for i,ifile in enumerate(files):
            if ifile.endswith("datPiffKnown.p"):
                continue
            fileresults = getFile(ifile)
            if debug:
                print(i,'/',len(files),'\t',ifile)
            for j,fileresult in enumerate(fileresults):
                if debug:
                    print("  ",j,'/',len(fileresults))
                mixArtists  = fileresult["ArtistName"]
                albumName   = fileresult["AlbumName"]
                albumURL    = fileresult["AlbumURL"]
                
                mixArtistNames = self.mulArts.getArtistNames(mixArtists)
                mixArtistNames = [x.title() for x in mixArtistNames.keys()]
                
                for artistName in mixArtistNames:
                    artistID   = str(self.dutils.getArtistID(artistName))
                    albumID    = str(self.dutils.getArtistID(albumName))
                    modval     = self.dutils.getArtistModVal(artistID)
                    if artistDBData.get(modval) is None:
                        artistDBData[modval] = {}
                    if artistDBData[modval].get(artistName) is None:
                        artistDBData[modval][artistName] = {"Name": artistName, "ID": artistID, "URL": None, "Profile": None, "Media": []}
                    albumData = {"Artists": mixArtistNames, "Name": albumName, "URL": albumURL, "Code": albumID}
                    artistDBData[modval][artistName]["Media"].append(albumData)

                    
                    
                    
        maxModVal   = self.disc.getMaxModVal()
        artistDBDir = self.disc.getArtistsDBDir()     
        totalSaves  = 0
        for modVal,modvaldata in artistDBData.items():
            dbData = {}
            for artistName, artistData in modvaldata.items():
                self.artist.setData(artistData)
                artistVal = self.artist.parse()
                dbData[artistVal.ID.ID] = artistVal
                        
            savename = setFile(artistDBDir, "{0}-DB.p".format(modVal))
            print("Saving {0} artist IDs to {1}".format(len(dbData), savename))
            totalSaves += len(dbData)
            saveFile(idata=dbData, ifile=savename)
            
            self.createArtistModValMetadata(modVal=modVal, db=dbData, debug=debug)
            self.createArtistAlbumModValMetadata(modVal=modVal, db=dbData, debug=debug)
            
        print("Saved {0} new artist IDs".format(totalSaves))
Exemplo n.º 14
0
    def processFlopsData(self, debug=False):
        outdir = self.getDataDir()
        files = findExt(outdir, ext=".html")

        from collections import OrderedDict
        movies = OrderedDict()
        yearlyData = {}
        for ifile in files:
            htmldata = getFile(ifile)
            bsdata = getHTML(htmldata)

            tables = bsdata.findAll("table", {"class": "wikitable"})
            for table in tables:

                trs = table.findAll("tr")

                try:
                    ths = trs[0].findAll("th")
                    ths = [x.text for x in ths]
                    ths = [x.replace("\n", "") for x in ths]
                except:
                    raise ValueError("Could not get headers")

                print(ths)

                for itr, tr in enumerate(trs[2:]):

                    ths = tr.findAll("th")
                    try:
                        movie = ths[0].text
                        movie = movie.replace("\n", "").strip()
                        movie = movie.replace("[nb 2]", "")
                    except:
                        raise ValueError(
                            "Could not find movie in {0}".format(ths))

                    tds = tr.findAll("td")
                    try:
                        year = tds[0].text
                        year = int(year)
                    except:
                        raise ValueError(
                            "Could not find year in {0}".format(tds))

                    print(year, '\t', movie)

                    if yearlyData.get(year) is None:
                        yearlyData[year] = []
                    yearlyData[year].append(movie)

        for year in sorted(yearlyData.keys()):
            movies[year] = []
            for movie in yearlyData[year]:
                movies[year].append([movie, 10])

        savename = setFile(self.getResultsDir(), "{0}.json".format(self.name))
        print("Saving {0} Years of flops Data to {1}".format(
            len(movies), savename))
        saveFile(savename, movies)
Exemplo n.º 15
0
 def downloadRottenTomatoesYearlyData(self, year, outdir, debug=False):
     yname = str(year)
     url = "https://www.rottentomatoes.com/top/bestofrt/?year=" + yname
     savename = setFile(outdir, "{0}.p".format(year))
     if isFile(savename): return
     if debug:
         print("Downloading/Saving {0}".format(savename))
     getWebData(base=url, savename=savename, useSafari=False)
Exemplo n.º 16
0
 def getMyMovies(self, debug=False): 
     savename = setFile(self.getDataDir(), "mymovies.json")
     if not isFile(savename):
         raise ValueError("Cannot access {0}".format(savename))
     mine = getFile(savename)
     if debug:
         print("Found {0} my movies".format(len(mine)))
     return mine
Exemplo n.º 17
0
 def getCombinedMovies(self, debug=False):        
     savename = setFile(self.combine.getResultsDir(), "movies.json")
     if not isFile(savename):
         raise ValueErrro("Cannot access {0}".format(savename))
     combinedMovies = getFile(savename)
     if debug:
         print("Found {0} combined movies".format(len(combinedMovies)))
     return combinedMovies
Exemplo n.º 18
0
    def __init__(self, path, chart, debug=False):
        self.debug = debug
        self.chart = chart
        self.path = path
        if chart is None:
            fullChartData = {}
            artistAlbumData = {}
            for chart in [
                    "MusicVF", "Billboard", "BillboardYE", "RateYourMusic",
                    "RateYourMusicSong", "RateYourMusicList",
                    "RateYourMusicList2"
            ]:
                print(chart)
                fullChartData.update(
                    getFile(
                        setFile(
                            path,
                            "current{0}FullChartArtistAlbumData.p".format(
                                chart.lower()))))
                print("There are {0} artists in the full chart data".format(
                    len(fullChartData)))
                artistAlbumData.update(
                    getFile(
                        setFile(
                            path, "current{0}ArtistAlbumData.p".format(
                                chart.lower()))))
                print("There are {0} artists in the artist album data".format(
                    len(artistAlbumData)))
            self.fullChartData = fullChartData
            self.artistAlbumData = artistAlbumData
        else:
            self.fullChartData = getFile(
                setFile(
                    path, "current{0}FullChartArtistAlbumData.p".format(
                        chart.lower())))
            print("There are {0} artists in the full chart data".format(
                len(self.fullChartData)))
            self.artistAlbumData = getFile(
                setFile(path,
                        "current{0}ArtistAlbumData.p".format(chart.lower())))
            print("There are {0} artists in the artist album data".format(
                len(self.artistAlbumData)))

        self.artistData = {}

        self.artistKeyToNameMap = {}
Exemplo n.º 19
0
 def parseArtistMetadataFiles(self, debug=False):   
     artistDBDir = self.disc.getArtistsDBDir()   
     maxModVal   = self.disc.getMaxModVal()
     for modVal in range(maxModVal):
         savename = setFile(artistDBDir, "{0}-DB.p".format(modVal))     
         dbdata   = getFile(savename)
         self.createArtistModValMetadata(modVal=modVal, db=dbdata, debug=debug)
         self.createArtistAlbumModValMetadata(modVal=modVal, db=dbdata, debug=debug)
Exemplo n.º 20
0
 def downloadWikiFilmYearlyData(self, year, outdir, debug=False):
     url = "https://en.wikipedia.org/wiki/{0}_in_film".format(year)
     savename = setFile(outdir, str(year) + ".p")
     if isFile(savename): return
     if debug:
         print("Downloading {0}".format(url))
     getWebData(base=url, savename=savename, useSafari=False)
     sleep(1)
Exemplo n.º 21
0
    def assertDBModValExtraData(self, modVal, minPages=1, maxPages=None, allowMulti=False, test=True, clean=True):
        print("assertDBModValExtraData(",modVal,")")
        artistDBDir = self.disc.getArtistsDBDir()
        dbname  = setFile(artistDBDir, "{0}-DB.p".format(modVal))     
        dbdata  = getFile(dbname)
        nerrs   = 0
        #ignores = self.artistIgnoreList()

        
        for artistID,artistData in dbdata.items():
            first = True
            pages = artistData.pages
            if pages.more is True:
                npages = pages.pages
                if npages < minPages:
                    continue
                if maxPages is not None:
                    npages = min([npages, maxPages])
                artistRef = artistData.url.url
                #if artistData.artist.name in ignores:
                #    print("\tNot downloading artist in ignore list: {0}".format(artistData.artist.name))
                #    continue
                    
                #savename = self.dutils.getArtistSavename(artistID)
                #removeFile(savename)
                #print("\t---> {0} / {1}   {2}".format(1, pages.pages, savename))

                #print(artistID,'\t',npages,'\t')
                #continue
                    
                    
                for p in range(1, npages+1):
                    if p == 1:
                        url      = self.getArtistURL(artistRef)
                        savename = self.dutils.getArtistSavename(artistID)
                    else:
                        url      = self.getArtistURL(artistRef, p)
                        savename = self.dutils.getArtistSavename(artistID, p)
                    print("\t---> {0} / {1}   {2}".format(p, pages.pages, url))
                    
                    if clean is True:
                        if isFile(savename):
                            print("Removing {0}".format(savename))
                            removeFile(savename)
                        
                    if test is True:
                        print("\t\tWill download: {0}".format(url))
                        print("\t\tJust testing... Will not download anything.")
                        continue
                        
                    if not isFile(savename):
                        if first:
                            print("{0: <20}{1: <10}{2}".format(artistID,pages.tot,artistData.artist.name))
                            first = False

                        print("{0: <20}{1: <10}{2}".format(artistID, "{0}/{1}".format(p,pages.pages), url))
                        self.dutils.downloadArtistURL(url=url, savename=savename, force=True)
                        sleep(3)
Exemplo n.º 22
0
 def getArtistSavename(self, discID):
     artistDir = self.disc.getArtistsDir()
     modValue = self.discogsUtils.getDiscIDHashMod(
         discID=discID, modval=self.disc.getMaxModVal())
     if modValue is not None:
         outdir = mkSubDir(artistDir, str(modValue))
         savename = setFile(outdir, discID + ".p")
         return savename
     return None
Exemplo n.º 23
0
Arquivo: SAG.py Projeto: tgadf/movies
    def downloadSAGCategoryData(self, category, outdir, debug=False):

        url = "https://en.wikipedia.org/wiki/Screen_Actors_Guild_Award_for_{0}".format(category)
        savename = setFile(outdir, category+".p")
        if isFile(savename): return
        if debug:
            print("Downloading {0}".format(url))
        getWebData(base=url, savename=savename, useSafari=False)
        sleep(1)
Exemplo n.º 24
0
 def getArtistSavename(self, discID, page=1, credit=False, unofficial=False):
     artistDir = self.disc.getArtistsDir()
     modValue  = self.dutils.getDiscIDHashMod(discID=discID, modval=self.disc.getMaxModVal())
     if modValue is not None:
         outdir    = mkSubDir(artistDir, str(modValue))
         if isinstance(page, int) and page > 1:
             outdir = mkSubDir(outdir, "extra")
             savename  = setFile(outdir, discID+"-{0}.p".format(page))
         elif credit is True:
             outdir = mkSubDir(outdir, "credit")
             savename  = setFile(outdir, discID+".p")
         elif unofficial is True:
             outdir = mkSubDir(outdir, "unofficial")
             savename  = setFile(outdir, discID+".p")
         else:
             savename  = setFile(outdir, discID+".p")
             
         return savename
     return None
Exemplo n.º 25
0
    def parseUltimateMovieRankingsYearlyData(self, procYear=None, debug=False):
        outdir = self.getDataDir()
        if procYear == None:
            files = findExt(outdir, ext=".p")
        else:
            files = findPatternExt(outdir, pattern=str(procYear), ext=".p")

        from collections import OrderedDict
        movieData = OrderedDict()
        for ifile in sorted(files):
            #ifile = "/Users/tgadfort/Documents/code/movies/ultimatemovierankings/data/2017.p"
            htmldata = getFile(ifile)
            bsdata = getHTML(htmldata)
            year = getBaseFilename(ifile)

            data = {}
            done = False
            tables = bsdata.findAll("table")  #, {"id": "table_3"})
            movies = {}
            for it, table in enumerate(tables):
                ths = table.findAll("th")
                trs = table.findAll("tr")
                for itr, tr in enumerate(trs):
                    tds = tr.findAll("td")
                    if len(tds) == 11:
                        val = removeTag(tds[1], 'span')
                        film = val.text
                        film = film.replace(" ({0})".format(year), "")
                        try:
                            rank = float(tds[-1].text)
                        except:
                            try:
                                rank = float(tds[-2].text)
                            except:
                                raise ValueError(tds[-1], tds[-2], tr)

                        movies[film] = rank

            movieData[year] = movies

        yearlyData = {}
        for year in sorted(movieData.keys()):
            yearlyData[year] = sorted(movieData[year].items(),
                                      key=operator.itemgetter(1),
                                      reverse=True)
            print("---->", year,
                  " (Top 5/{0} Movies) <----".format(len(yearlyData[year])))
            for item in yearlyData[year][:5]:
                print(item)
            print('\n')

        savename = setFile(self.getResultsDir(), "{0}.json".format(self.name))
        print("Saving {0} Years of Ultimate Movie Rankings data to {1}".format(
            len(yearlyData), savename))
        saveFile(savename, yearlyData)
Exemplo n.º 26
0
    def getFilename(self, fast, local):
        basename = "ManualMultiArtists"

        self.localpfname = "{0}.p".format(basename)
        self.localyfname = "{0}.yaml".format(basename)
        self.pfname = setFile(self.multiArtistDir, self.localpfname)
        self.yfname = setFile(self.multiArtistDir, self.localyfname)

        if fast is True:
            if local is True:
                return self.localpfname
            else:
                return self.pfname
        else:
            if local is True:
                return self.localyfname
            else:
                return self.yfname

        raise ValueError("Somehow didn't get a filename!")
Exemplo n.º 27
0
 def downloadRottenTomatoesTop100Data(self, genre, outdir, debug=False):
     baseurl = "https://www.rottentomatoes.com"
     outdir = setDir(self.getDataDir())
     if not isDir(outdir): mkDir(outdir)
     url = "/top/bestofrt/top_100_" + genre + "_movies/"
     url = baseurl + url
     savename = setFile(outdir, genre + ".p")
     if isFile(savename): return
     if debug:
         print("Downloading/Saving {0}".format(savename))
     getWebData(base=url, savename=savename, useSafari=False, dtime=10)
     sleep(2)
Exemplo n.º 28
0
 def searchBoxOfficeMojo(self, movie, debug=False):
     savename = setFile(self.getResultsDir(), "{0}.json".format(self.name))
     data = getFile(savename)
     print("Nearest matches for {0}".format(movie))
     for year, yearlyMovies in data.items():
         result = findNearest(movie, [x[0] for x in yearlyMovies],
                              num=1,
                              cutoff=0.9)
         if len(result) > 0:
             values = [(name, value) for name, value in yearlyMovies
                       if name in result]
             print("{0: <6}{1}".format(year, values))
Exemplo n.º 29
0
 def downloadFilms101YearlyData(self, year, outdir, debug=False):
     url="http://www.films101.com/y{0}r.htm".format(year)
     savename = setFile(outdir, "{0}.p".format(year))
     if isFile(savename): return
     
     try:
         if debug:
             print("Downloading/Saving {0}".format(savename))
         getWebData(base=url, savename=savename, useSafari=False)
     except:
         return
     sleep(2)
Exemplo n.º 30
0
    def processBoxOfficeMojo(self, debug=False):
        outdir = self.getResultsDir()
        savename = setFile(outdir, "results.json")

        data = getFile(savename)
        movies = {}
        yearlyData = {}
        for i, year in enumerate(sorted(data.keys())):
            movies[year] = {}
            ydata = data[year]

            for wdata in ydata:
                for mdata in wdata:
                    movie = mdata[2]
                    retval = re.search("\((\d+)\)", movie)
                    if retval:
                        stryear = retval.group()
                        movie = movie.replace(stryear, "").strip()

                    gross = convertCurrency(mdata[9])
                    weekly = convertCurrency(mdata[4])
                    money = max(gross, weekly)
                    if movies[year].get(movie) == None:
                        movies[year][movie] = money
                    else:
                        movies[year][movie] = max(money, movies[year][movie])

            yearlyData[year] = sorted(movies[year].items(),
                                      key=operator.itemgetter(1),
                                      reverse=True)
            print("---->", year,
                  " (Top 25/{0} Movies) <----".format(len(yearlyData[year])))
            for item in yearlyData[year][:25]:
                print(item)
            print('\n')

        savename = setFile(outdir, "{0}.json".format(self.name))
        print("Saving", len(yearlyData), "yearly results to", savename)
        saveFile(savename, yearlyData)