def get_time(self, html): try: text = re.findall("<div class=\"ainfo\"><b style='color:#000'>(?!</a>)(.*)</b>", html, re.IGNORECASE | re.UNICODE)[0] text = unicode(text, 'cp1251') return html_decode(text) except IndexError: return None
def get_title(self, html): try: text = re.findall('<a href="video[^"]*noiphone">(?!</a>)(.*)</a>', html, re.IGNORECASE | re.UNICODE)[0] text = unicode(text, 'cp1251') text = text.replace('<span class="match">', '').replace('</span>', '') return html_decode(text) except IndexError: return None
def field(self, regexp_before, html): reg_all = "([^<>]*)" try: text = re.findall(regexp_before + reg_all + '<', html, re.IGNORECASE | re.UNICODE)[0] text = unicode(text, 'cp1251') return html_decode(text) except IndexError: return ""
def get_time(self, html): try: text = re.findall( "<div class=\"ainfo\"><b style='color:#000'>(?!</a>)(.*)</b>", html, re.IGNORECASE | re.UNICODE)[0] text = unicode(text, 'cp1251') return html_decode(text) except IndexError: return None
def find_tracks_by_url(self, url): logging.debug("Search By URL") url_parse = urlparse(url) if url_parse.fragment: params = dict([part.split('=') for part in url_parse.fragment.split('&')]) result = self.get(url, params) else: result = self.get(url) try: result = unicode(result, 'cp1251') except: result = result reg_all = "([^{<}]*)" result_url = re.findall(ur"http:([\\/.0-9_A-Z]*)", result, re.IGNORECASE) result_artist = re.findall(u"q]=" + reg_all + "'", result, re.IGNORECASE | re.UNICODE) result_title = re.findall('"title([0-9_]*)">' + reg_all + '<', result, re.IGNORECASE | re.UNICODE) result_time = re.findall('duration">' + reg_all, result, re.IGNORECASE | re.UNICODE) result_lyr = re.findall(ur"showLyrics" + reg_all, result, re.IGNORECASE | re.UNICODE) logging.info("lyr:::" + str(result_lyr)) songs = [] j = 0 for i, artist in enumerate(result_artist): path = "http:" + result_url[i + 3].replace("\\/", "/") title = html_decode(result_title[i][1]) if not title: if len(result_lyr) > j: title = result_lyr[j] title = title[title.find(";'>") + 3:] j += 1 artist = html_decode(artist) #song = VKSong(path, artist, title, result_time[i]); if "\">" in title: title = title[title.find("\">") + 2:] text = artist + " - " + title #print text song = FModel(text, path).add_artist(artist).add_title(title).add_time(result_time[i]) songs.append(song) logging.info(len(songs)) return songs
def get_json(self, html): json_code = re.findall("(\{.*\})", html)[0] json_code = html_decode(json_code) try: video = simplejson.loads(json_code) except: return None #if is not valid json if 'host' not in video: return None return video