Пример #1
0
 def get_time(self, html):
     try:
         text = re.findall("<div class=\"ainfo\"><b style='color:#000'>(?!</a>)(.*)</b>", html, re.IGNORECASE | re.UNICODE)[0]
         text = unicode(text, 'cp1251')
         return html_decode(text)
     except IndexError:
         return None
Пример #2
0
 def get_title(self, html):
     try:
         text = re.findall('<a href="video[^"]*noiphone">(?!</a>)(.*)</a>', html, re.IGNORECASE | re.UNICODE)[0]
         text = unicode(text, 'cp1251')
         text = text.replace('<span class="match">', '').replace('</span>', '')
         return html_decode(text)
     except IndexError:
         return None
Пример #3
0
 def field(self, regexp_before, html):
     reg_all = "([^<>]*)"
     try:
         text = re.findall(regexp_before + reg_all + '<', html, re.IGNORECASE | re.UNICODE)[0]
         text = unicode(text, 'cp1251')
         return html_decode(text)
     except IndexError:
         return ""
Пример #4
0
 def get_time(self, html):
     try:
         text = re.findall(
             "<div class=\"ainfo\"><b style='color:#000'>(?!</a>)(.*)</b>",
             html, re.IGNORECASE | re.UNICODE)[0]
         text = unicode(text, 'cp1251')
         return html_decode(text)
     except IndexError:
         return None
Пример #5
0
 def field(self, regexp_before, html):
     reg_all = "([^<>]*)"
     try:
         text = re.findall(regexp_before + reg_all + '<', html,
                           re.IGNORECASE | re.UNICODE)[0]
         text = unicode(text, 'cp1251')
         return html_decode(text)
     except IndexError:
         return ""
Пример #6
0
 def find_tracks_by_url(self, url):
     logging.debug("Search By URL")
     url_parse = urlparse(url)
     
     if url_parse.fragment:
         params = dict([part.split('=') for part in url_parse.fragment.split('&')])
         result = self.get(url, params)
     else:
         result = self.get(url)
         
     try:
         result = unicode(result, 'cp1251')            
     except:
         result = result
     
     reg_all = "([^{<}]*)"
     result_url = re.findall(ur"http:([\\/.0-9_A-Z]*)", result, re.IGNORECASE)
     result_artist = re.findall(u"q]=" + reg_all + "'", result, re.IGNORECASE | re.UNICODE)
     result_title = re.findall('"title([0-9_]*)">' + reg_all + '<', result, re.IGNORECASE | re.UNICODE)
      
     result_time = re.findall('duration">' + reg_all, result, re.IGNORECASE | re.UNICODE)
     result_lyr = re.findall(ur"showLyrics" + reg_all, result, re.IGNORECASE | re.UNICODE)
     logging.info("lyr:::" + str(result_lyr))
     songs = []
     j = 0
     for i, artist in enumerate(result_artist):
         path = "http:" + result_url[i + 3].replace("\\/", "/")
         title = html_decode(result_title[i][1])
         if not title:
             if len(result_lyr) > j:
                 title = result_lyr[j]
                 title = title[title.find(";'>") + 3:]
                 j += 1
         artist = html_decode(artist)
         #song = VKSong(path, artist, title, result_time[i]);
         if "\">" in title:
             title = title[title.find("\">") + 2:]
         text = artist + " - " + title
         #print text
         song = FModel(text, path).add_artist(artist).add_title(title).add_time(result_time[i])
         songs.append(song)
     logging.info(len(songs))
     return songs 
Пример #7
0
 def get_json(self, html):
     json_code = re.findall("(\{.*\})", html)[0]
     json_code = html_decode(json_code)
     try:
         video = simplejson.loads(json_code)
     except:
         return None #if is not valid json 
     if 'host' not in video:
         return None
     return video
Пример #8
0
 def get_title(self, html):
     try:
         text = re.findall('<a href="video[^"]*noiphone">(?!</a>)(.*)</a>',
                           html, re.IGNORECASE | re.UNICODE)[0]
         text = unicode(text, 'cp1251')
         text = text.replace('<span class="match">',
                             '').replace('</span>', '')
         return html_decode(text)
     except IndexError:
         return None
Пример #9
0
 def get_json(self, html):
     json_code = re.findall("(\{.*\})", html)[0]
     json_code = html_decode(json_code)
     try:
         video = simplejson.loads(json_code)
     except:
         return None  #if is not valid json
     if 'host' not in video:
         return None
     return video