Exemplo n.º 1
0
    def downloadKWorbSpotifyYouTubeArtists(self, update=False):
        url = "https://kworb.net/youtube/archive.html"
        savename = "kworb_youtubeartists.p"
        if update is True:
            self.dutils.downloadArtistURL(url=url,
                                          savename=savename,
                                          force=True)

        bsdata = getHTML(savename)
        data = []
        artistDir = self.disc.getArtistsDir()
        saveDir = setDir(artistDir, "youtube")
        print(artistDir)
        for table in bsdata.findAll("table"):
            ths = [th.text for th in table.findAll("th")]
            for tr in table.findAll("tr")[1:]:
                item = dict(zip(ths, tr.findAll("td")))
                data.append(item)

        print(data)

        if False:
            bsdata = getHTML(savename)
            artistDir = self.disc.getArtistsDir()
            saveDir = setDir(artistDir, "youtube")
            for div in bsdata.findAll("div", {"class": "subcontainer"}):
                if div.find("span", {"class": "pagetitle"}) is None:
                    continue
                for ref in div.findAll("a"):
                    href = ref.attrs['href']
                    url = "{0}/{1}".format(self.youtubeURL, href)
                    savename = "{0}/{1}".format(saveDir,
                                                href.replace(".html", ".p"))
                    if isFile(savename):
                        print("Y\t", savename, '\t', url)
                    else:
                        print("-\t", savename, '\t', url)
                        #dbArtistsKWorb().dutils.downloadArtistURL(url=fullURL, savename=savename, force=True)

            for ifile in findExt(saveDir, ".p"):
                bsdata = getHTML(ifile)
                for table in bsdata.findAll("table"):
                    trs = table.findAll("tr")
                    for tr in trs[1:]:
                        ref = tr.find("a")
                        href = ref.attrs['href']
                        name = ref.text
                        url = "{0}/{1}".format(self.youtubeURL, href)
                        savename = "{0}/{1}".format(
                            setDir(saveDir, "artist"),
                            href.replace(".html", ".p"))
                        print(url, savename)

                        if isFile(savename) is False:
                            data, code = downloadURL(url)
                            from ioUtils import getFile, saveFile
                            saveFile(idata=data, ifile=savename)
                            sleep(3)
                            break
Exemplo n.º 2
0
    def assertDBModValExtraData(self, modVal, minPages=1, maxPages=None, allowMulti=False, test=True, clean=True):
        print("assertDBModValExtraData(",modVal,")")
        artistDBDir = self.disc.getArtistsDBDir()
        dbname  = setFile(artistDBDir, "{0}-DB.p".format(modVal))     
        dbdata  = getFile(dbname)
        nerrs   = 0
        #ignores = self.artistIgnoreList()

        
        for artistID,artistData in dbdata.items():
            first = True
            pages = artistData.pages
            if pages.more is True:
                npages = pages.pages
                if npages < minPages:
                    continue
                if maxPages is not None:
                    npages = min([npages, maxPages])
                artistRef = artistData.url.url
                #if artistData.artist.name in ignores:
                #    print("\tNot downloading artist in ignore list: {0}".format(artistData.artist.name))
                #    continue
                    
                #savename = self.dutils.getArtistSavename(artistID)
                #removeFile(savename)
                #print("\t---> {0} / {1}   {2}".format(1, pages.pages, savename))

                #print(artistID,'\t',npages,'\t')
                #continue
                    
                    
                for p in range(1, npages+1):
                    if p == 1:
                        url      = self.getArtistURL(artistRef)
                        savename = self.dutils.getArtistSavename(artistID)
                    else:
                        url      = self.getArtistURL(artistRef, p)
                        savename = self.dutils.getArtistSavename(artistID, p)
                    print("\t---> {0} / {1}   {2}".format(p, pages.pages, url))
                    
                    if clean is True:
                        if isFile(savename):
                            print("Removing {0}".format(savename))
                            removeFile(savename)
                        
                    if test is True:
                        print("\t\tWill download: {0}".format(url))
                        print("\t\tJust testing... Will not download anything.")
                        continue
                        
                    if not isFile(savename):
                        if first:
                            print("{0: <20}{1: <10}{2}".format(artistID,pages.tot,artistData.artist.name))
                            first = False

                        print("{0: <20}{1: <10}{2}".format(artistID, "{0}/{1}".format(p,pages.pages), url))
                        self.dutils.downloadArtistURL(url=url, savename=savename, force=True)
                        sleep(3)
Exemplo n.º 3
0
Arquivo: mp3id.py Projeto: tgadf/mp3id
    def __init__(self, mp3=None, debug=False, allowMissing=True, test=False):
        self.mp3exts = [".mp3", ".MP3", ".Mp3"]

        if mp3 is not None:
            if not isFile(mp3):
                raise ValueError("Could not access {0}".format(mp3))
            if getExt(mp3) not in self.mp3exts:
                raise ValueError("This is not an mp3")

        self.mp3 = mp3
        self.debug = debug
        self.allowMissing = allowMissing
        self.test = test

        self.tags = {
            'TALB': 'Album',
            'TBPM': 'BPM',
            'TCMP': 'Compilation',
            'TCOM': 'Composer',
            'TCOP': 'Copyright',
            'TENC': 'EncodedBy',
            'TEXT': 'Lyricist',
            'TIT2': 'Title',
            'TIT3': 'Version',
            'TLEN': 'Length',
            'TMED': 'Media',
            'TMOO': 'Mood',
            'TOLY': 'Author',
            'TPE1': 'Artist',
            'TPE2': 'Performer',
            'TPE3': 'Conductor',
            'TPE4': 'Arranger',
            'TPOS': 'DiscNumber',
            'TPUB': 'Organization',
            'TRCK': 'TrackNumber',
            'TSO2': 'AlbumArtist',
            'TSOA': 'Album',
            'TSOC': 'Composer',
            'TSOP': 'Artist',
            'TSOT': 'Title',
            'TSRC': 'Isrc',
            'TSST': 'DiscSubtitle'
        }

        self.id3Map = {v: k for k, v in self.tags.items()}

        self.tagsEasyID3 = {}
        self.tagsID3 = {}

        if isFile(self.mp3):
            self.setMP3(self.mp3)
Exemplo n.º 4
0
    def downloadUltimateMovieRankingsYearlyData(self,
                                                year,
                                                outdir,
                                                debug=False):
        yname = str(year)

        url = "https://www.ultimatemovierankings.com/{0}-top-box-office-movies/".format(
            year)
        url = "https://www.ultimatemovierankings.com/top-grossing-movies-of-{0}/".format(
            year)
        url = "https://www.ultimatemovierankings.com/{0}-movies/".format(year)
        url = "https://www.ultimatemovierankings.com/{0}-top-grossing-movies/".format(
            year)
        url = "https://www.ultimatemovierankings.com/biggest-box-office-hits-of-{0}/".format(
            year)
        url = "https://www.ultimatemovierankings.com/top-grossing-{0}-movies/".format(
            year)
        url = "https://www.ultimatemovierankings.com/ranking-{0}-movies/".format(
            year)
        url = "https://www.ultimatemovierankings.com/best-worst-movies-{0}/".format(
            year)

        savename = setFile(outdir, yname + ".p")
        if isFile(savename): return
        if debug:
            print("Downloading/Saving {0}".format(savename))
        try:
            getWebData(base=url, savename=savename, useSafari=False)
            sleep(2)
        except:
            sleep(0.2)
Exemplo n.º 5
0
    def getArtistModValFiles(self, modVal, previousDays=5, force=False):
        artistDir = self.disc.getArtistsDir()
        maxModVal = self.disc.getMaxModVal()
                    
        artistDBDir = self.disc.getArtistsDBDir()        
        
        dirVal = setDir(artistDir, str(modVal))
        files  = findExt(dirVal, ext='.p')
        dbname = setFile(artistDBDir, "{0}-DB.p".format(modVal))
        
        now    = datetime.now()
        if isFile(dbname):
            lastModified = datetime.fromtimestamp(path.getmtime(dbname))
            if force is True:
                lastModified = None
        else:
            lastModified = None

        newFiles = None
        if lastModified is None:
            newFiles = files
            print("  ===> Parsing all {0} files for modval {1}".format(len(newFiles), modVal))
        else:
            numNew    = [ifile for ifile in files if (now-datetime.fromtimestamp(path.getmtime(ifile))).days < previousDays]
            numRecent = [ifile for ifile in files if datetime.fromtimestamp(path.getmtime(ifile)) > lastModified]
            newFiles  = list(set(numNew).union(set(numRecent)))
            print("  ===> Found new {0} files (< {1} days) to parse for modval {2}".format(len(newFiles), previousDays, modVal))
        return newFiles
Exemplo n.º 6
0
    def createRawOscarData(self, debug=True):
        print("Checking for poorly parsed oscar data.")
        indir = self.wikiData.getResultsDir()
        files = sorted(findExt(indir, ext=".json"))
        if debug:
            print("Found {0} oscar files".format(len(files)))
        yearlyData = {}
        for ifile in files:
            year = getBaseFilename(ifile)
            yearlyData[year] = getFile(ifile)

        savename = setFile(self.getCorrectionsDir(), "saved.yaml")
        if not isFile(savename):
            savedData = {}
        else:
            savedData = getFile(savename)

        for year in savedData.keys():
            for title in savedData[year].keys():
                savedWinner = savedData[year][title].get("Winner")
                savedNominees = savedData[year][title].get("Nominees")
                if savedWinner is not None:
                    print("Overwritting {0} {1} winner".format(year, title))
                    yearlyData[year][title]["Winner"] = savedWinner
                if savedNominees is not None:
                    print("Overwritting {0} {1} nominees".format(year, title))
                    yearlyData[year][title]["Nominees"] = savedNominees

        savename = setFile(self.getCorrectionsDir(), "raw.yaml")
        saveFile(idata=yearlyData, ifile=savename)
Exemplo n.º 7
0
    def downloadMissingArtistUnofficial(self):
        ts = timestat("Downloading Missing Artist Unofficial Files")
        for modVal, modValData in self.metadata.items():
            tsMod = timestat(
                "Downloading {0} Missing Artist Unofficial Files For ModVal={1}"
                .format(len(modValData), modVal))
            N = len(modValData)
            for i, (artistID, artistPageData) in enumerate(modValData.items()):
                artistName = artistPageData["Name"]
                artistURL = artistPageData["URL"]

                print("=" * 100)
                print("{0}/{1}:  [{2}] / [{3}]".format(i, N, artistName,
                                                       artistURL))
                url = self.dbArtists.getArtistURL(artistURL, unofficial=True)
                savename = self.dutils.getArtistSavename(artistID,
                                                         unofficial=True)

                if isFile(savename):
                    continue

                try:
                    self.dutils.downloadArtistURL(url, savename)
                except:
                    print("Error downloading {0}".format(url))

            tsMod.stop()
        ts.stop()
Exemplo n.º 8
0
 def __init__(self, debug=False):
     self.db     = "DatPiff"
     self.disc   = dbBase(self.db.lower())
     self.artist = artistDP(self.disc)
     self.dutils = datpiffUtils()
     self.dutils.setDiscogs(self.disc)
     self.debug  = debug
     
     ## MultiArtist
     self.mulArts  = multiartist()
     
     print("DatPiff ArtistsDir: {0}".format(self.disc.getArtistsDir()))
     if not isDir(self.disc.getArtistsDir()):
         raise ValueError("Could not find artist dir for DatPiff")
     self.knownDir  = setDir(self.disc.getArtistsDir(), "known")
     if not isDir(self.knownDir):
         print("Make sure that Piggy is loaded!!!")
         raise ValueError("Could not find known [{0}] dir for DatPiff".format(self.knownDir))
     self.knownFile = setFile(self.knownDir, "datPiffKnown.p")
     if not isFile(self.knownFile):
         raise ValueError("Known File [{0}] does not exist".format(self.knownFile))
     
     self.baseURL   = "https://www.datpiff.com/"
     self.searchURL = "https://www.datpiff.com/mixtapes-search?"
     
     super().__init__(self.db, self.disc, self.artist, self.dutils, debug=debug)
Exemplo n.º 9
0
    def downloadMissingArtistExtras(self, maxPages=None):
        ts = timestat("Downloading Missing Artist Extra Files")
        for modVal,modValData in self.metadata.items():
            tsMod = timestat("Downloading {0} Missing Artist Extra Files For ModVal={1}".format(len(modValData), modVal))
            N = len(modValData)
            for i,(artistID,artistPageData) in enumerate(modValData.items()):
                artistName = artistPageData["Name"]
                artistURL  = artistPageData["URL"]
                pages      = artistPageData["Pages"]
                print("="*100)
                print("{0}/{1}:  [{2}] / [{3}]".format(i,N,artistName,artistURL))
                for j,page in enumerate(range(pages)):
                    if maxPages is not None:
                        if j > maxPages:
                            continue
                    url      = self.dbArtists.getArtistURL(artistURL, page=page)
                    savename = self.dutils.getArtistSavename(artistID, page=page)
                    if isFile(savename):
                        continue

                    print("{0}/{1}:  [{2}] / [{3}] / [{4}-{5}]".format(i,N,artistName,artistURL,j,pages))
                    
                    try:
                        self.dutils.downloadArtistURL(url, savename)
                    except:
                        print("Error downloading {0}".format(url))
                        
            tsMod.stop()
        ts.stop()
Exemplo n.º 10
0
    def getDBData(self, modVal, force=False, debug=False):
        dbname = self.disc.getArtistsDBModValFilename(modVal)
        dbdata = {}
        localForce = False
        if self.credit is True or self.extra is True or self.song is True or self.composition is True:
            localForce = False
        else:
            localForce = force

        if isFile(dbname) is False:
            localForce = True

        if localForce is False:
            if debug:
                print("Loading {0}".format(dbname))
            dbdata = fileIO().get(dbname)
            if isinstance(dbdata, Series):
                dbdata = dbdata.to_dict()
            if debug:
                print("  ===> Found {0} previous data for ModVal={1}".format(
                    len(dbdata), modVal))
        else:
            print("  ===> Forcing Reloads of ModVal={0}".format(modVal))

        return dbdata
Exemplo n.º 11
0
 def downloadWikiFilmYearlyData(self, year, outdir, debug=False):
     url = "https://en.wikipedia.org/wiki/{0}_in_film".format(year)
     savename = setFile(outdir, str(year) + ".p")
     if isFile(savename): return
     if debug:
         print("Downloading {0}".format(url))
     getWebData(base=url, savename=savename, useSafari=False)
     sleep(1)
Exemplo n.º 12
0
 def downloadRottenTomatoesYearlyData(self, year, outdir, debug=False):
     yname = str(year)
     url = "https://www.rottentomatoes.com/top/bestofrt/?year=" + yname
     savename = setFile(outdir, "{0}.p".format(year))
     if isFile(savename): return
     if debug:
         print("Downloading/Saving {0}".format(savename))
     getWebData(base=url, savename=savename, useSafari=False)
Exemplo n.º 13
0
Arquivo: mp3id.py Projeto: tgadf/mp3id
    def setMP3(self, mp3):
        if isFile(mp3):
            self.mp3 = mp3

            self.findEasyTags()
            self.findID3Tags()
        else:
            raise ValueError("Could not access {0}".format(mp3))
Exemplo n.º 14
0
 def installData(self):
     if not isDir(self.musicDataDir):
         print("Install: Making Prefix Dir [{0}]".format(self.musicDataDir))
         mkDir(self.musicDataDir)
     if not isFile(self.getFilename(local=False)):
         print("Install: Creating Prefix Data From Local Data")
         fileIO().save(idata=fileIO().get(self.getFilename(local=True)),
                       ifile=self.getFilename(local=False))
Exemplo n.º 15
0
 def installData(self):
     if not isDir(self.multiArtistDir):
         print("Install: Making Prefix Dir [{0}]".format(
             self.multiArtistDir))
         mkDir(self.multiArtistDir)
     if not isFile(self.getFilename(fast=True, local=False)):
         print("Install: Creating Prefix Data From Local Data")
         self.writeToMainPickleFromLocalYAML()
Exemplo n.º 16
0
 def getCombinedMovies(self, debug=False):        
     savename = setFile(self.combine.getResultsDir(), "movies.json")
     if not isFile(savename):
         raise ValueErrro("Cannot access {0}".format(savename))
     combinedMovies = getFile(savename)
     if debug:
         print("Found {0} combined movies".format(len(combinedMovies)))
     return combinedMovies
Exemplo n.º 17
0
 def getMyMovies(self, debug=False): 
     savename = setFile(self.getDataDir(), "mymovies.json")
     if not isFile(savename):
         raise ValueError("Cannot access {0}".format(savename))
     mine = getFile(savename)
     if debug:
         print("Found {0} my movies".format(len(mine)))
     return mine
Exemplo n.º 18
0
    def setMusic(self, file):
        if isFile(file):
            self.file = file
            if getExt(file) in self.flacExts:
                self.isFLAC = True
                if self.debug is True:
                    print("  File is FLAC")
            elif getExt(file) in self.mp3Exts:
                self.isMP3 = True
                if self.debug:
                    print("  File is MP3")
            elif getExt(file) in self.m4aExts:
                self.isM4A = True
                if self.debug:
                    print("  File is M4A")
            elif getExt(file) in self.asfExts:
                self.isASF = True
                if self.debug:
                    print("  File is ASF (WMA)")
            elif getExt(file) in self.oggExts:
                self.isOGG = True
                if self.debug:
                    print("  File is OGG")
            elif getExt(file) in self.aiffExts:
                self.isAIFF = True
                if self.debug:
                    print("  File is AIFF")
            elif getExt(file) in self.wavExts:
                self.isWAV = True
                if self.debug:
                    print("  File is WAV")
            elif getExt(file) in self.skips:
                self.skip = True
            elif ".DS_Store" in file:
                self.skip = True
            else:
                raise ValueError(
                    "Could not determine format for [{0}] with extention [{1}]"
                    .format(file, getExt(file)))

            if self.isMP3 is True:
                #self.findID3Tags()
                self.findEasyTags()
            if self.isFLAC is True:
                self.findFlacTags()
            if self.isM4A is True:
                self.findM4ATags()
            if self.isASF is True:
                self.findASFTags()
            if self.isOGG is True:
                self.findOGGTags()
            if self.isAIFF is True:
                self.findAIFFTags()
            if self.isWAV is True:
                self.findWAVTags()
        else:
            raise ValueError("Could not access {0}".format(ifile))
Exemplo n.º 19
0
Arquivo: SAG.py Projeto: tgadf/movies
    def downloadSAGCategoryData(self, category, outdir, debug=False):

        url = "https://en.wikipedia.org/wiki/Screen_Actors_Guild_Award_for_{0}".format(category)
        savename = setFile(outdir, category+".p")
        if isFile(savename): return
        if debug:
            print("Downloading {0}".format(url))
        getWebData(base=url, savename=savename, useSafari=False)
        sleep(1)
Exemplo n.º 20
0
    def downloadMainArtists(self, force=False, debug=False, sleeptime=2):
        savename = self.getMainSavename()

        ## Parse data
        bsdata = getHTML(savename)
        artistDB = {}

        ## Find and Download Artists
        categories = bsdata.find("div",
                                 {"class": "sidebar-widget widget_categories"})
        if categories is None:
            raise ValueError("Cannot find categories!")
        uls = categories.findAll("ul")
        for ul in uls:
            lis = ul.findAll("li")
            for i, li in enumerate(lis):
                try:
                    catitem = li.attrs["class"][1]
                except:
                    raise ValueError(
                        "Cannot find list class item: {0}".format(li))
                ref = li.find("a")
                if ref is None:
                    raise ValueError("Cannot find list link!")
                try:
                    href = ref.attrs['href']
                except:
                    raise ValueError("Cannot find list href!")

                # check for artist
                artistName = href.split('/')[-2]
                try:
                    int(artistName)
                    continue
                except:
                    if artistName.find("parent-category-ii") == -1:
                        pass
                    else:
                        continue

                # get artist ID
                artistID = catitem.split('-')[-1]
                try:
                    int(artistID)
                except:
                    continue

                if force is False and isFile(savename):
                    print("{0} exists.".format(savename))
                    continue

                url = href
                savename = self.getArtistSavename(artistID)
                print(i, '\t', artistID, '\t', artistName, '\t', savename)
                self.downloadArtistURL(url=url, savename=savename, parse=False)
Exemplo n.º 21
0
    def parseSearchArtist(self,
                          artist,
                          data,
                          maxArtists=99,
                          force=False,
                          debug=False):
        if data is None:
            return None

        ## Parse data
        bsdata = getHTML(data)

        artistDB = {}

        uls = bsdata.findAll("ul", {"class": "search-results"})
        for ul in uls:
            lis = ul.findAll("li", {"class": "artist"})
            for li in lis:
                divs = li.findAll("div", {"class": "name"})
                for div in divs:
                    link = div.find("a")
                    href = link.attrs['href']
                    tooltip = link.attrs['data-tooltip']
                    try:
                        from json import loads
                        tooltip = loads(tooltip)
                        artistID = tooltip['id']
                    except:
                        artistID = None

                    if artistDB.get(href) is None:
                        artistDB[href] = {"N": 0, "Name": artist}
                    artistDB[href]["N"] += 1

        if self.debug:
            print("Found {0} artists".format(len(artistDB)))

        iArtist = 0
        for href, hrefData in artistDB.items():
            iArtist += 1
            if iArtist > maxArtists:
                break

            discID = self.dutils.getArtistID(href)
            url = self.getArtistURL(href)
            savename = self.dutils.getArtistSavename(discID)

            print(iArtist, '/', len(artistDB), '\t:', discID, '\t', url)

            if isFile(savename):
                if force is False:
                    continue

            self.dutils.downloadArtistURL(url, savename, force=force)
Exemplo n.º 22
0
    def downloadArtistUnofficialURL(self, artistData, debug=False, force=False):
        artistRef = artistData.url.url
        artistID  = artistData.ID.ID
        print("Downloading credit URL for ArtistID {0}".format(artistID))

        url      = self.getArtistURL(artistRef, unofficial=True)
        savename = self.getArtistSavename(artistID, unofficial=True)
        if not isFile(savename) or force is True:
            retval = self.downloadArtistURL(url=url, savename=savename, force=force)
            return retval
        return False
Exemplo n.º 23
0
 def downloadRottenTomatoesTop100Data(self, genre, outdir, debug=False):
     baseurl = "https://www.rottentomatoes.com"
     outdir = setDir(self.getDataDir())
     if not isDir(outdir): mkDir(outdir)
     url = "/top/bestofrt/top_100_" + genre + "_movies/"
     url = baseurl + url
     savename = setFile(outdir, genre + ".p")
     if isFile(savename): return
     if debug:
         print("Downloading/Saving {0}".format(savename))
     getWebData(base=url, savename=savename, useSafari=False, dtime=10)
     sleep(2)
Exemplo n.º 24
0
 def downloadFilms101YearlyData(self, year, outdir, debug=False):
     url="http://www.films101.com/y{0}r.htm".format(year)
     savename = setFile(outdir, "{0}.p".format(year))
     if isFile(savename): return
     
     try:
         if debug:
             print("Downloading/Saving {0}".format(savename))
         getWebData(base=url, savename=savename, useSafari=False)
     except:
         return
     sleep(2)
Exemplo n.º 25
0
    def downloadTeamStandingsByYear(self, year, debug=False):
        baseurl = self.getBase()
        suburl = "college-football/standings/_/season"
        url = join(baseurl, suburl, str(year))

        savename = setFile(self.getSeasonDir(), str(year) + ".p")
        if isFile(savename):
            return

        if debug:
            print("Downloading {0}".format(url))
        getWebData(base=url, savename=savename, useSafari=False)
        sleep(10 + 2 * random())
Exemplo n.º 26
0
    def parseSearchArtist(self,
                          artist,
                          data,
                          maxArtists=99,
                          force=False,
                          debug=False):
        return
        if data is None:
            return None

        ## Parse data
        bsdata = getHTML(data)

        artistDB = {}

        for div in bsdata.findAll("div", {"class": "section"}):
            refs = div.findAll("a")
            for ref in refs:
                if ref.find("img") is not None:
                    continue
                href = ref.attrs['href']
                artist = ref.text

                if href.startswith("/artist/") is False:
                    continue

                #print(artist,"\t",href)
                if artistDB.get(href) is None:
                    artistDB[href] = {"N": 0, "Name": artist}
                artistDB[href]["N"] += 1

        if self.debug:
            print("Found {0} artists".format(len(artistDB)))

        iArtist = 0
        for href, hrefData in artistDB.items():
            iArtist += 1
            if iArtist > maxArtists:
                break

            discID = self.dutils.getArtistID(href)
            url = self.getArtistURL(href)
            savename = self.dutils.getArtistSavename(discID)

            print(iArtist, '/', len(artistDB), '\t:', discID, '\t', url)
            #continue
            if isFile(savename):
                if force is False:
                    continue

            self.dutils.downloadArtistURL(url, savename, force=force)
Exemplo n.º 27
0
    def downloadGameData(self, debug=False, verydebug=False):
        resultsDir = self.getSeasonResultsDir()
        files = findExt(resultsDir, ext=".p", debug=False)

        gameType = "playbyplay"
        print("Sleeping for 5 seconds...")
        sleep(5)

        for ifile in files:
            seasonData = getFile(ifile)
            year = seasonData.getYear()
            if year not in [2013, 2014, 2015]:
                continue
            gamesDir = self.getYearlyGamesDir(year)

            teams = seasonData.teams
            for teamID, teamData in teams.items():
                teamGames = teamData.games
                for gameData in teamGames:
                    gameResult = gameData["Result"]
                    gameObject = gameData["Game"]
                    gameID = gameObject.gameID

                    if False:
                        prevLocation = "/Volumes/Seagate/Football/Games/Plays/{0}.html".format(
                            gameID)
                        if isFile(prevLocation):
                            savename = setFile(gamesDir,
                                               "{0}.p".format(gameID))
                            if not isFile(savename) or True:
                                data = open(prevLocation, "rb").read()
                                saveFile(idata=data,
                                         ifile=savename,
                                         debug=True)
                                continue
                        continue

                    self.downloadGameDataByID(gameID, year, debug)
Exemplo n.º 28
0
    def downloadTeamStatisticsDataByYear(self, idval, name, year, debug=False):
        baseurl = self.getBase()
        suburl = "college-football/team/stats/_/id/{0}/season".format(idval)
        url = join(baseurl, suburl, str(year))

        outputdir = self.getYearlyStatisticsDir(year)
        savename = setFile(outputdir, "{0}-{1}.p".format(name, year))
        if isFile(savename):
            return

        if debug:
            print("Downloading {0} to {1}".format(url, savename))
        getWebData(base=url, savename=savename, useSafari=False)
        sleep(15 + 2 * random())
Exemplo n.º 29
0
    def getDBData(self, dbname, prefix, returnName=False, debug=False):
        savename = setFile(self.getDiscogDBDir(),
                           "{0}{1}.p".format(prefix, dbname))
        if self.debug is True:
            print("Data stored in {0}".format(savename))
        if returnName is True:
            return savename
        if not isFile(savename):
            raise ValueError("Could not find {0}".format(savename))

        if self.debug:
            print("Returning data from {0}".format(savename))
        data = getFile(savename, debug=debug)
        return data
Exemplo n.º 30
0
    def downloadUnknownArtistCompositions(self):
        newIgnores = []
        for modVal, modValMetadata in self.metadata.items():
            N = len(modValMetadata)
            ts = timestat(
                "Downloading {0} Unknown Composition Files For ModVal={1}".
                format(N, modVal))
            for i, (artistID,
                    artistIDData) in enumerate(modValMetadata.items()):
                savename = self.dutils.getArtistSavename(artistID, song=True)

                href = artistIDData["URL"]
                artist = artistIDData["Name"]
                if isFile(savename):
                    continue

                ## Replace /credits with /songs
                href = "/".join(href.split('/')[:-1] + ["songs", "all"])

                ## Create Full URL
                url = urllib.parse.urljoin(self.dbArtists.baseURL, href)
                print("\n")
                print("=" * 100)
                print("{0}/{1}:  [{2}] / [{3}]".format(i, N, artist, url))

                data, response = self.dutils.downloadURL(url)
                if response == 200:
                    bsdata = getHTML(data)
                    if len(bsdata.findAll("th",
                                          {"class": "title-composer"})) > 0:
                        print("  ---> Saving Data To {0}".format(savename))
                        saveFile(idata=data, ifile=savename)
                        sleep(3)
                        continue

                sleep(3)
                newIgnores.append(artistID)

                if i == 20:
                    break
            ts.stop()

        print("New IDs To Ignore")
        print(newIgnores)
        tsUpdate = timestat(
            "Adding {0} ArtistIDs To Master Composition Ignore List".format(
                len(newIgnores)))
        self.updateMasterIgnoreCompositionData(newIgnores)
        tsUpdate.stop()