Пример #1
0
def GScsv2RDF(infilename, outfilename, format="xml", withdescriptions=False):
	if withdescriptions:
		commonchords = ConjunctiveGraph()
		commonchords.load("CommonChords.rdf")
		extrachords = ConjunctiveGraph()

	lines = open(infilename).readlines()
	
	#
	# Initial model bits
	#
	mi = mopy.MusicInfo()

	homepage = mopy.foaf.Document("http://sourceforge.net/projects/motools")
	mi.add(homepage)
	program = mopy.foaf.Agent()
	program.name = "GScsv2RDF.py"
	program.homepage = homepage
	mi.add(program)

	tl = TimeLine("#tl")
	tl.label = "Timeline derived from "+infilename
	tl.maker = program
	mi.add(tl)

	[artistStr, titleStr] = [f.strip() for f in lines[0].split("\t")]
	# Add artist & title metadata
	signal = Signal()
	signal.time = sig_int = Interval()
	sig_int.label="Whole signal interval";
	sig_int.beginsAtDuration = secondsToXSDDuration(0);
	sig_int.onTimeLine = tl
	signal.published_as = track = Track()
	artist = MusicArtist()
	artist.made = track
	artist.name = artistStr
	track.title = titleStr
	mi.add(sig_int)
	mi.add(signal)
	mi.add(track)
	mi.add(artist)
	
	
	
	
	lineNum = 1
	segmentNum = 0
	thisSegment_i = None
	chordSymbol=''

	t_secs = 0.0
	
	for line in lines[1:]:
#		print "parsing line "+str(lineNum)
		try:
			lastChordSymbol = chordSymbol
			t_secs = getTimestamp(line)
			chordSymbol = getChordSymbol(line)
			if chordSymbol != lastChordSymbol:
#				print " handling new chord symbol"
				segmentNum += 1

				lastSegment_i = thisSegment_i
				thisSegment_i = Interval("#i_"+str(segmentNum))
				thisSegment_i.beginsAtDuration = secondsToXSDDuration(t_secs)
				if lastSegment_i != None:
#					print " terminating last interval"
					lastSegment_i.endsAtDuration = secondsToXSDDuration(t_secs)
					thisSegment_i.intervalAfter = lastSegment_i
					lastSegment_i.intervalBefore = thisSegment_i
				mi.add(thisSegment_i)
				
			
				chordURI = "http://purl.org/ontology/chord/symbol/"+chordSymbol.replace("#","s").replace(",","%2C")

				if withdescriptions and \
				   len(list(commonchords.predicate_objects(URIRef(chordURI)))) == 0 and \
				   len(list(extrachords.predicate_objects(URIRef(chordURI)))) == 0:
					# Deref to grab chord info
					print "loading <"+chordURI+">..."
					extrachords.load(chordURI)

				c = Chord(chordURI)
				c_event = ChordEvent("#ce_"+str(segmentNum))
				c_event.chord = c
				c_event.time = thisSegment_i
				c_event.label = chordSymbol
					
				mi.add(c); mi.add(c_event);
#				print " added new chord event for "+chordURI
							
		except Exception, e:
			print("ERROR : Problem parsing input file at line "+str(lineNum)+" !\n")
			raise
		lineNum+=1
Пример #2
0
	def fpFile(self, filename):
		"""Looks up the MusicDNS PUID for the given filename using fingerprinting
		   and stores the resulting info in the given graph"""
		global genpuidbin, MusicDNSKey
		
		track = Track()
		audiofile = AudioFile(urlencode(os.path.basename(filename)))
		track.available_as = audiofile
		signal = Signal()
		signal.published_as = track
		mi = MusicInfo([track, audiofile, signal])
		
		filename = clean(filename)
		# TODO : If file isn't a WAV or MP3, use suitable decoder, and then pass the resulting wav to genpuid.
		res_xml = os.popen(genpuidbin + " " + MusicDNSKey + " -rmd=2 -xml -noanalysis \""+filename+"\"").readlines()
			
		retry_count=0
		while ("".join(res_xml).find("unanalyzable") > 0) and (retry_count < 5):
			warning("MusicDNS reports file is unanalyzable. Trying again...") # This can be caused by server hiccups
			retry_count+=1
			res_xml = os.popen(genpuidbin + " " + MusicDNSKey + " -rmd=2 -xml -noanalysis \""+filename+"\"").readlines()

		# parse results
		try:
			if (res_xml[0] == res_xml[1]):
				res_xml=res_xml[1:] # oddly, we see "<genpuid songs="1">\n" twice when the file is "unanalyzable"

			clean_xml = "".join(res_xml).replace("mip:","") # strip out unknown prefix so minidom can parse
			dom = xml.dom.minidom.parseString(clean_xml)
	
			root = dom.getElementsByTagName("genpuid")[0]

			if (root.hasAttribute("songs") == False) or (int(root.getAttribute("songs")) == 0):
				return MusicInfo()
			
			trackelem = root.getElementsByTagName("track")[0]
			
			if (trackelem.childNodes[0].nodeName=="#text") and (trackelem.childNodes[0].data == "unavailable"):
				info(" No PUID available for track : "+str(trackelem.childNodes[0].data))
				return MusicInfo()
			
			titles = trackelem.getElementsByTagName("title")
			if len(titles)>0:
				track.title = titles[0].childNodes[0].wholeText
			
			artists = trackelem.getElementsByTagName("artist")
			if len(artists)>0:
				artistobj=MusicArtist()
				artistobj.name = artists[0].getElementsByTagName("name")[0].childNodes[0].wholeText
				mi.add(artistobj)
				track.maker = artistobj
			
			puid_list = trackelem.getElementsByTagName("puid-list")
			puid_nodes = puid_list[0].getElementsByTagName("puid")
			puids = []
			
			for puid_node in puid_nodes:
				puids.append(puid_node.getAttribute("id"))
			if len(puids) == 0:
				info(" No PUID available for track : "+str(trackelem.childNodes[0].data))
				return MusicInfo()
		 	elif len(puids) > 1:
				warning("Multiple PUIDs returned for track !")
				
			puid = puids[0]
			signal.puid = puid
			

			# FIXME
			# years = trackelem.getElementsByTagName("first-release-date")
			# if len(years)>0:
			# 	year = years[0].childNodes[0].wholeText
			# else:
			# 	year = None

			# FIXME
			# release_dates = trackelem.getElementsByTagName("first-release_date")
			# if len(release_dates)>0:
			# 	release_date = release_dates[0]
			# else:
			# 	release_date = None
		
			# FIXME
			# genres = []
			# genre_lists = trackelem.getElementsByTagName("genre-list")
			# if len(genre_lists)>0:
			# 	for genre_list in genre_lists:
			# 		genre_nodes = genre_list.getElementsByTagName("genre")
			# 		for genre_node in genre_nodes:
			# 			genres.append(genre_node.getElementsByTagName("name")[0].childNodes[0].wholeText)
				
		except Exception, e:
			error("Failure while parsing results !")
			debug("xml :\n"+"".join(res_xml))
			error(str(e))
			return MusicInfo()