def getPaths( fileExtensions=None, expandExtensions=True, name=('local', 'core', 'virtual'), ): ''' Get paths from core, virtual, and/or local corpora. This is the public interface for getting all corpus paths with one function. ''' paths = [] if 'core' in name: paths += corpora.CoreCorpus().getPaths( fileExtensions=fileExtensions, expandExtensions=expandExtensions, ) if 'local' in name: paths += corpora.LocalCorpus().getPaths( fileExtensions=fileExtensions, expandExtensions=expandExtensions, ) if 'virtual' in name: paths += corpora.VirtualCorpus().getPaths( fileExtensions=fileExtensions, expandExtensions=expandExtensions, ) return paths
def fromName(name): ''' Instantiate a specific corpus based on `name`: >>> corpus.manager.fromName('core') <music21.corpus.corpora.CoreCorpus> >>> corpus.manager.fromName('virtual') <music21.corpus.corpora.VirtualCorpus> >>> corpus.manager.fromName('local') <music21.corpus.corpora.LocalCorpus: 'local'> >>> corpus.manager.fromName(None) <music21.corpus.corpora.LocalCorpus: 'local'> Note that this corpus probably does not exist on disk, but it's ready to have paths added to it and to be stored on disk. >>> corpus.manager.fromName('testDummy') <music21.corpus.corpora.LocalCorpus: 'testDummy'> ''' if name == 'core': return corpora.CoreCorpus() elif name == 'virtual': return corpora.VirtualCorpus() elif name == 'local': return corpora.LocalCorpus() else: return corpora.LocalCorpus(name=name)
def iterateCorpora(returnObjects=True): ''' a generator that iterates over the corpora (either as objects or as names) for use in pan corpus searching. This test will only show the first three, because it needs to run the same on every system: >>> for i, corpusObject in enumerate(corpus.manager.iterateCorpora()): ... print(corpusObject) ... if i == 2: ... break <music21.corpus.corpora.CoreCorpus> <music21.corpus.corpora.VirtualCorpus> <music21.corpus.corpora.LocalCorpus: 'local'> We can also get names instead... Note that the name of the main localcorpus is 'local' not None >>> for i, corpusName in enumerate(corpus.manager.iterateCorpora(returnObjects=False)): ... print(corpusName) ... if i == 2: ... break core virtual local New in v.3 ''' if returnObjects is True: yield corpora.CoreCorpus() yield corpora.VirtualCorpus() for cn in listLocalCorporaNames(): yield corpora.LocalCorpus(cn) else: yield corpora.CoreCorpus().name yield corpora.VirtualCorpus().name for cn in listLocalCorporaNames(): if cn is None: yield 'local' else: yield cn
def getVirtualPaths(fileExtensions=None, expandExtensions=True): ''' Get all paths in the virtual corpus that match a known extension. An extension of None will return all known extensions. >>> len(corpus.getVirtualPaths()) > 6 True ''' return corpora.VirtualCorpus().getPaths( fileExtensions=fileExtensions, expandExtensions=expandExtensions, )
def getVirtualWorkList(workName, movementNumber=None, fileExtensions=None): ''' Given a work name, search all virtual works and return a list of URLs for any matches. >>> corpus.getVirtualWorkList('bach/bwv1007/prelude') ['http://kern.ccarh.org/cgi-bin/ksdata?l=cc/bach/cello&file=bwv1007-01.krn&f=xml'] >>> corpus.getVirtualWorkList('junk') [] ''' return corpora.VirtualCorpus().getWorkList( workName, movementNumber=movementNumber, fileExtensions=fileExtensions, )
def fromCacheName(name): ''' Instantiate a specific corpus based on its `cacheName`: These are the same as `fromName`. >>> corpus.manager.fromCacheName('core') <music21.corpus.corpora.CoreCorpus> >>> corpus.manager.fromCacheName('virtual') <music21.corpus.corpora.VirtualCorpus> >>> corpus.manager.fromCacheName('local') <music21.corpus.corpora.LocalCorpus: 'local'> >>> corpus.manager.fromCacheName(None) <music21.corpus.corpora.LocalCorpus: 'local'> Other local corpora are different and prefaced by "local-": >>> corpus.manager.fromCacheName('local-testDummy') <music21.corpus.corpora.LocalCorpus: 'testDummy'> Raises a corpus exception if it is not an allowable cache name. >>> corpus.manager.fromCacheName('testDummy') Traceback (most recent call last): music21.exceptions21.CorpusException: Cannot parse a cacheName of 'testDummy' ''' if name == 'core': return corpora.CoreCorpus() elif name == 'virtual': return corpora.VirtualCorpus() elif name == 'local' or name is None: return corpora.LocalCorpus() elif name.startswith('local-'): return corpora.LocalCorpus(name=name[6:]) else: raise CorpusException("Cannot parse a cacheName of '{0}'".format(name))
def cacheMetadata(corpusNames=('local', 'core', 'virtual'), useMultiprocessing=True, verbose=False): ''' Cache metadata from corpora in `corpusNames` as local cache files: Call as ``metadata.cacheMetadata()`` ''' from music21 import corpus from music21.corpus import corpora from music21 import metadata if not common.isIterable(corpusNames): corpusNames = (corpusNames,) timer = common.Timer() timer.start() # store list of file paths that caused an error failingFilePaths = [] # the core cache is based on local files stored in music21 # virtual is on-line for corpusName in corpusNames: if corpusName == 'core': metadataBundle = corpora.CoreCorpus().metadataBundle paths = corpus.getCorePaths() useCorpus = True elif corpusName == 'local': metadataBundle = corpora.LocalCorpus().metadataBundle paths = corpus.getLocalPaths() useCorpus = False elif corpusName == 'virtual': metadataBundle = corpora.VirtualCorpus().metadataBundle paths = corpus.getVirtualPaths() useCorpus = False else: message = 'invalid corpus name provided: {0!r}'.format(corpusName) raise MetadataCacheException(message) message = 'metadata cache: starting processing of paths: {0}'.format( len(paths)) if verbose is True: environLocal.warn(message) else: environLocal.printDebug(message) failingFilePaths += metadataBundle.addFromPaths( paths, useCorpus=useCorpus, useMultiprocessing=useMultiprocessing, verbose=verbose ) message = 'cache: writing time: {0} md items: {1}'.format( timer, len(metadataBundle)) if verbose is True: environLocal.warn(message) else: environLocal.printDebug(message) del metadataBundle message = 'cache: final writing time: {0} seconds'.format(timer) if verbose is True: environLocal.warn(message) else: environLocal.printDebug(message) for failingFilePath in failingFilePaths: message = 'path failed to parse: {0}'.format(failingFilePath) if verbose is True: environLocal.warn(message) else: environLocal.printDebug(message)