def GScsv2RDF(infilename, outfilename, format="xml", withdescriptions=False): if withdescriptions: commonchords = ConjunctiveGraph() commonchords.load("CommonChords.rdf") extrachords = ConjunctiveGraph() lines = open(infilename).readlines() # # Initial model bits # mi = mopy.MusicInfo() homepage = mopy.foaf.Document("http://sourceforge.net/projects/motools") mi.add(homepage) program = mopy.foaf.Agent() program.name = "GScsv2RDF.py" program.homepage = homepage mi.add(program) tl = TimeLine("#tl") tl.label = "Timeline derived from "+infilename tl.maker = program mi.add(tl) [artistStr, titleStr] = [f.strip() for f in lines[0].split("\t")] # Add artist & title metadata signal = Signal() signal.time = sig_int = Interval() sig_int.label="Whole signal interval"; sig_int.beginsAtDuration = secondsToXSDDuration(0); sig_int.onTimeLine = tl signal.published_as = track = Track() artist = MusicArtist() artist.made = track artist.name = artistStr track.title = titleStr mi.add(sig_int) mi.add(signal) mi.add(track) mi.add(artist) lineNum = 1 segmentNum = 0 thisSegment_i = None chordSymbol='' t_secs = 0.0 for line in lines[1:]: # print "parsing line "+str(lineNum) try: lastChordSymbol = chordSymbol t_secs = getTimestamp(line) chordSymbol = getChordSymbol(line) if chordSymbol != lastChordSymbol: # print " handling new chord symbol" segmentNum += 1 lastSegment_i = thisSegment_i thisSegment_i = Interval("#i_"+str(segmentNum)) thisSegment_i.beginsAtDuration = secondsToXSDDuration(t_secs) if lastSegment_i != None: # print " terminating last interval" lastSegment_i.endsAtDuration = secondsToXSDDuration(t_secs) thisSegment_i.intervalAfter = lastSegment_i lastSegment_i.intervalBefore = thisSegment_i mi.add(thisSegment_i) chordURI = "http://purl.org/ontology/chord/symbol/"+chordSymbol.replace("#","s").replace(",","%2C") if withdescriptions and \ len(list(commonchords.predicate_objects(URIRef(chordURI)))) == 0 and \ len(list(extrachords.predicate_objects(URIRef(chordURI)))) == 0: # Deref to grab chord info print "loading <"+chordURI+">..." extrachords.load(chordURI) c = Chord(chordURI) c_event = ChordEvent("#ce_"+str(segmentNum)) c_event.chord = c c_event.time = thisSegment_i c_event.label = chordSymbol mi.add(c); mi.add(c_event); # print " added new chord event for "+chordURI except Exception, e: print("ERROR : Problem parsing input file at line "+str(lineNum)+" !\n") raise lineNum+=1
def fpFile(self, filename): """Looks up the MusicDNS PUID for the given filename using fingerprinting and stores the resulting info in the given graph""" global genpuidbin, MusicDNSKey track = Track() audiofile = AudioFile(urlencode(os.path.basename(filename))) track.available_as = audiofile signal = Signal() signal.published_as = track mi = MusicInfo([track, audiofile, signal]) filename = clean(filename) # TODO : If file isn't a WAV or MP3, use suitable decoder, and then pass the resulting wav to genpuid. res_xml = os.popen(genpuidbin + " " + MusicDNSKey + " -rmd=2 -xml -noanalysis \""+filename+"\"").readlines() retry_count=0 while ("".join(res_xml).find("unanalyzable") > 0) and (retry_count < 5): warning("MusicDNS reports file is unanalyzable. Trying again...") # This can be caused by server hiccups retry_count+=1 res_xml = os.popen(genpuidbin + " " + MusicDNSKey + " -rmd=2 -xml -noanalysis \""+filename+"\"").readlines() # parse results try: if (res_xml[0] == res_xml[1]): res_xml=res_xml[1:] # oddly, we see "<genpuid songs="1">\n" twice when the file is "unanalyzable" clean_xml = "".join(res_xml).replace("mip:","") # strip out unknown prefix so minidom can parse dom = xml.dom.minidom.parseString(clean_xml) root = dom.getElementsByTagName("genpuid")[0] if (root.hasAttribute("songs") == False) or (int(root.getAttribute("songs")) == 0): return MusicInfo() trackelem = root.getElementsByTagName("track")[0] if (trackelem.childNodes[0].nodeName=="#text") and (trackelem.childNodes[0].data == "unavailable"): info(" No PUID available for track : "+str(trackelem.childNodes[0].data)) return MusicInfo() titles = trackelem.getElementsByTagName("title") if len(titles)>0: track.title = titles[0].childNodes[0].wholeText artists = trackelem.getElementsByTagName("artist") if len(artists)>0: artistobj=MusicArtist() artistobj.name = artists[0].getElementsByTagName("name")[0].childNodes[0].wholeText mi.add(artistobj) track.maker = artistobj puid_list = trackelem.getElementsByTagName("puid-list") puid_nodes = puid_list[0].getElementsByTagName("puid") puids = [] for puid_node in puid_nodes: puids.append(puid_node.getAttribute("id")) if len(puids) == 0: info(" No PUID available for track : "+str(trackelem.childNodes[0].data)) return MusicInfo() elif len(puids) > 1: warning("Multiple PUIDs returned for track !") puid = puids[0] signal.puid = puid # FIXME # years = trackelem.getElementsByTagName("first-release-date") # if len(years)>0: # year = years[0].childNodes[0].wholeText # else: # year = None # FIXME # release_dates = trackelem.getElementsByTagName("first-release_date") # if len(release_dates)>0: # release_date = release_dates[0] # else: # release_date = None # FIXME # genres = [] # genre_lists = trackelem.getElementsByTagName("genre-list") # if len(genre_lists)>0: # for genre_list in genre_lists: # genre_nodes = genre_list.getElementsByTagName("genre") # for genre_node in genre_nodes: # genres.append(genre_node.getElementsByTagName("name")[0].childNodes[0].wholeText) except Exception, e: error("Failure while parsing results !") debug("xml :\n"+"".join(res_xml)) error(str(e)) return MusicInfo()