def get_data(self): data = {"id": self.id} url = compose_url("viewMovie", {"id": self.id}) xml = read_url(url, None, ITUNES_HEADERS) f = open("/Users/rolux/Desktop/iTunesData.xml", "w") f.write(xml) f.close() data["actors"] = parse_cast(xml, "actors") string = find_re(xml, "Average Rating:(.*?)</HBoxView>") data["averageRating"] = string.count("rating_star_000033.png") + string.count("½") * 0.5 data["directors"] = parse_cast(xml, "directors") data["format"] = find_re(xml, "Format:(.*?)<") data["genre"] = decode_html(find_re(xml, "Genre:(.*?)<")) data["plotSummary"] = decode_html( find_re(xml, 'PLOT SUMMARY</b>.*?<SetFontStyle normalStyle="textColor">(.*?)</SetFontStyle>') ) data["posterUrl"] = find_re(xml, 'reflection="." url="(.*?)"') data["producers"] = parse_cast(xml, "producers") data["rated"] = find_re(xml, "Rated(.*?)<") data["relatedMovies"] = parse_movies(xml, "related movies") data["releaseDate"] = find_re(xml, "Released(.*?)<") data["runTime"] = find_re(xml, "Run Time:(.*?)<") data["screenwriters"] = parse_cast(xml, "screenwriters") data["soundtrackId"] = find_re(xml, "viewAlbum\?id=(.*?)&") data["trailerUrl"] = find_re(xml, 'autoplay="." url="(.*?)"') return data
def get_lyrics(title, artist): html = read_url('http://lyricsfly.com/api/') key = find_re(html, '<font color=green><b>(.*?)</b></font>') url = 'http://lyricsfly.com/api/api.php?i=%s&a=%s&t=%s' % (key, artist, title) xml = read_url(url) lyrics = find_re(xml, '<tx>(.*?)\[br\] Lyrics [a-z]* by lyricsfly.com') lyrics = lyrics.replace('\n', '').replace('\r', '') lyrics = lyrics.replace('[br]', '\n').strip() lyrics.replace('\n\n\n', '\n\n') lyrics = decode_html(lyrics.replace('&', '&')) return lyrics
def parse_xml_dict(xml): values = {} strings = xml.split("<key>") for string in strings: if string.find("</key>") != -1: key = find_re(string, "(.*?)</key>") type = find_re(string, "</key><(.*?)>") if type == "true/": value = True else: value = find_re(string, "<%s>(.*?)</%s>" % (type, type)) if type == "integer": value = int(value) elif type == "string": value = decode_html(value) values[key] = value return values