def search(song, n, processes=config.search_processes, returnGen=False): ''' Function searches song and returns n valid .mp3 links. @param song: Search string. @param n: Number of songs. @param processes: Number of processes to launch in the subprocessing pool. @param returnGen: If true, a generator of the links will be returned, and not the calculated list itself. ''' sources_list = [x for x in config.search_sources_const if config.search_sources[x]] log.debug("Using sources: %s" % sources_list) # IMPROVE: better handeling of slicing. pool = ThreadPool(max_threads=min(processes, len(sources_list)), catch_returns=True, logger=log) args_list = [] for source in sources_list: args_list.append([song, source, n/len(sources_list)]) if n % len(sources_list): args_list[-1][2] += 1 for args in args_list: pool(parse)(*args) gen = pool.iter() if returnGen: return gen return list(gen)
def search(song, n, processes=config.search_processes, returnGen=False): ''' Function searches song and returns n valid .mp3 links. @param song: Search string. @param n: Number of songs. @param processes: Number of processes to launch in the subprocessing pool. @param returnGen: If true, a generator of the links will be returned, and not the calculated list itself. ''' sources_list = [ x for x in config.search_sources_const if config.search_sources[x] ] log.debug("Using sources: %s" % sources_list) # IMPROVE: better handeling of slicing. pool = ThreadPool(max_threads=min(processes, len(sources_list)), catch_returns=True, logger=log) args_list = [] for source in sources_list: args_list.append([song, source, n / len(sources_list)]) if n % len(sources_list): args_list[-1][2] += 1 for args in args_list: pool(parse)(*args) gen = pool.iter() if returnGen: return gen return list(gen)
def parse_soundcloud_api2(title): ''' Function connects to soundcloud.com and returns the .mp3 links in it. API method 2: Parsing player's json data. ''' links = search_soundcloud(title) pool = ThreadPool(max_threads=5, catch_returns=True, logger=log) for link in links: pool(get_soundcloud_dl_link)(link) return pool.iter()
def parse_bandcamp(title): links = search_bandcamp(title) max_result_parsing = 3 i = 0 pool = ThreadPool(max_threads=5, catch_returns=True, logger=log) for link in links: if '/album/' in link: pool(get_bandcamp_album_dl_links)(link) i += 1 elif '/track/' in link: pool(get_bandcamp_dl_link)(link) i += 1 if i >= max_result_parsing: break return pool.iter()
def run(self): # Called by Qt once the thread environment has been set up. google_ans = Main.WebParser.WebServices.googleImageSearch(self.song)[:] pool = ThreadPool(max_threads=config.GoogleImagesGrabber_processes, catch_returns=True, logger=log) fn_list = [] while len(fn_list) < self.numOfPhotos and google_ans: urls = [] for i in range(self.numOfPhotos-len(fn_list)): if google_ans: urls.append(google_ans.pop(0)) for url in urls: pool(self.fetchPhoto)(url) for photo in pool.iter(): try: if photo: fn_list.append(photo) except Exception, e: log.warning("Exception %s ignored in GoogleImagesGrabberThread." % str(e))
def run(self): # Called by Qt once thethread environment has been set up. if urlparse(self.song).scheme in config.allowd_web_protocols: # if a url and not a search string self.isDirectLink = True self.url = self.song self.song = "" domainName = urlparse(self.url).netloc.lower() if (domainName.endswith('youtube.com') or domainName.endswith('youtu.be')) and not 'videoplayback' in self.url: queries = parse_qs(urlparse(self.url).query) if 'p' in queries or 'list' in queries: log.debug("Url is a direct url (Youtube playlist)") if 'p' in queries: playlist_id = queries['p'][0] elif 'list' in queries: playlist_id = queries['list'][0] videos_ids = Main.WebParser.LinksGrabber.parse_Youtube_playlist(playlist_id) t_pool = ThreadPool(max_threads=config.buildSongObjs_processes, catch_returns=True, logger=log) for v_id in videos_ids: t_pool(Main.WebParser.LinksGrabber.get_youtube_dl_link)(v_id) links_gen = t_pool.iter() else: log.debug("Url is a direct url (Youtube)") if domainName.endswith('youtube.com'): video_id = queries['v'][0] else: video_id = urlparse(self.url).path.strip('/') try: metaUrlObj = Main.WebParser.LinksGrabber.get_youtube_dl_link(video_id) links_gen = (x for x in [metaUrlObj]) if metaUrlObj else (x for x in []) except YoutubeException, e: self.error.emit(e) self.dont_emit_NoResultsException_error = True links_gen = (x for x in []) # elif domainName.endswith('bandcamp.com'): # if '/album/' in self.url: # log.debug("Url is a direct url (bandcamp album)") # metaUrlObjs = Main.WebParser.LinksGrabber.get_bandcamp_album_dl_links(self.url) # links_gen = (x for x in metaUrlObjs) # elif '/track/' in self.url: # log.debug("Url is a direct url (bandcamp)") # metaUrlObj = Main.WebParser.LinksGrabber.get_bandcamp_dl_link(self.url) # links_gen = (x for x in [metaUrlObj]) if metaUrlObj else (x for x in []) # else: # links_gen = (x for x in []) # elif domainName.endswith('soundcloud.com'): # log.debug("Url is a direct url (Soundcloud)") # if self.url.startswith('https://'): # self.url = self.url.replace('https://', 'http://') # metaUrlObj = Main.WebParser.LinksGrabber.get_soundcloud_dl_link(self.url) # links_gen = (x for x in [metaUrlObj]) if metaUrlObj else (x for x in []) else: metaUrlObjs = [] ydlResult = Main.WebParser.LinksGrabber.get_ydl_extract_info(self.url) # from PyQt4 import QtCore; import pdb; QtCore.pyqtRemoveInputHook(); pdb.set_trace() if ydlResult: if not ydlResult.has_key('entries'): ydlResult['entries'] = [ydlResult] elif ydlResult.has_key('url'): ydlResult['entries'] = [ydlResult] for entry in ydlResult['entries']: metaUrlObj = utils.cls.MetaUrl( entry['url'], entry['extractor'], itag=utils.cls.ItagData(0, entry['ext'], entry.get('height', entry['format_id'])), title=entry.get('title'), videoid=entry.get('id'), webpage_url=entry.get('webpage_url'), view_count=entry.get('view_count', 0), description=entry.get('description', ''), thumbnail=entry.get('thumbnail') ) metaUrlObjs.append(metaUrlObj) links_gen = (x for x in metaUrlObjs) else: log.debug("Url is a direct url (%s file)." % os.path.splitext(self.url.split('/')[-1])[1].strip('.')) metaUrlObj = utils.cls.MetaUrl(self.url, "Direct Link") links_gen = (x for x in [metaUrlObj])
def run(self): # Called by Qt once thethread environment has been set up. if urlparse(self.song).scheme in config.allowd_web_protocols: # if a url and not a search string self.isDirectLink = True self.url = self.song self.song = "" domainName = urlparse(self.url).netloc.lower() if (domainName.endswith('youtube.com') or domainName.endswith('youtu.be') ) and not 'videoplayback' in self.url: queries = parse_qs(urlparse(self.url).query) if 'p' in queries or 'list' in queries: log.debug("Url is a direct url (Youtube playlist)") if 'p' in queries: playlist_id = queries['p'][0] elif 'list' in queries: playlist_id = queries['list'][0] videos_ids = Main.WebParser.LinksGrabber.parse_Youtube_playlist( playlist_id) t_pool = ThreadPool( max_threads=config.buildSongObjs_processes, catch_returns=True, logger=log) for v_id in videos_ids: t_pool( Main.WebParser.LinksGrabber.get_youtube_dl_link)( v_id) links_gen = t_pool.iter() else: log.debug("Url is a direct url (Youtube)") if domainName.endswith('youtube.com'): video_id = queries['v'][0] else: video_id = urlparse(self.url).path.strip('/') try: metaUrlObj = Main.WebParser.LinksGrabber.get_youtube_dl_link( video_id) links_gen = (x for x in [metaUrlObj] ) if metaUrlObj else (x for x in []) except YoutubeException, e: self.error.emit(e) self.dont_emit_NoResultsException_error = True links_gen = (x for x in []) # elif domainName.endswith('bandcamp.com'): # if '/album/' in self.url: # log.debug("Url is a direct url (bandcamp album)") # metaUrlObjs = Main.WebParser.LinksGrabber.get_bandcamp_album_dl_links(self.url) # links_gen = (x for x in metaUrlObjs) # elif '/track/' in self.url: # log.debug("Url is a direct url (bandcamp)") # metaUrlObj = Main.WebParser.LinksGrabber.get_bandcamp_dl_link(self.url) # links_gen = (x for x in [metaUrlObj]) if metaUrlObj else (x for x in []) # else: # links_gen = (x for x in []) # elif domainName.endswith('soundcloud.com'): # log.debug("Url is a direct url (Soundcloud)") # if self.url.startswith('https://'): # self.url = self.url.replace('https://', 'http://') # metaUrlObj = Main.WebParser.LinksGrabber.get_soundcloud_dl_link(self.url) # links_gen = (x for x in [metaUrlObj]) if metaUrlObj else (x for x in []) else: metaUrlObjs = [] ydlResult = Main.WebParser.LinksGrabber.get_ydl_extract_info( self.url) # from PyQt4 import QtCore; import pdb; QtCore.pyqtRemoveInputHook(); pdb.set_trace() if ydlResult: if not ydlResult.has_key('entries'): ydlResult['entries'] = [ydlResult] elif ydlResult.has_key('url'): ydlResult['entries'] = [ydlResult] for entry in ydlResult['entries']: metaUrlObj = utils.cls.MetaUrl( entry['url'], entry['extractor'], itag=utils.cls.ItagData( 0, entry['ext'], entry.get('height', entry['format_id'])), title=entry.get('title'), videoid=entry.get('id'), webpage_url=entry.get('webpage_url'), view_count=entry.get('view_count', 0), description=entry.get('description', ''), thumbnail=entry.get('thumbnail')) metaUrlObjs.append(metaUrlObj) links_gen = (x for x in metaUrlObjs) else: log.debug( "Url is a direct url (%s file)." % os.path.splitext( self.url.split('/')[-1])[1].strip('.')) metaUrlObj = utils.cls.MetaUrl(self.url, "Direct Link") links_gen = (x for x in [metaUrlObj])
def run(self): # Called by Qt once thethread environment has been set up. if urlparse(self.song).scheme in config.allowd_web_protocols: # if a url and not a search string self.isDirectLink = True self.url = self.song self.song = "" domainName = urlparse(self.url).netloc.lower() if domainName.endswith('youtube.com') or domainName.endswith('youtu.be'): queries = parse_qs(urlparse(self.url).query) if 'p' in queries or 'list' in queries: log.debug("Url is a direct url (Youtube playlist)") if 'p' in queries: playlist_id = queries['p'][0] elif 'list' in queries: playlist_id = queries['list'][0] videos_ids = Main.WebParser.LinksGrabber.parse_Youtube_playlist(playlist_id) t_pool = ThreadPool(max_threads=config.buildSongObjs_processes, catch_returns=True, logger=log) for id in videos_ids: t_pool(Main.WebParser.LinksGrabber.get_youtube_dl_link)(id) links_gen = t_pool.iter() else: log.debug("Url is a direct url (Youtube)") if domainName.endswith('youtube.com'): video_id = queries['v'][0] else: video_id = urlparse(self.url).path.strip('/') try: metaUrlObj = Main.WebParser.LinksGrabber.get_youtube_dl_link(video_id) except YoutubeException, e: self.error.emit(e) links_gen = (x for x in [metaUrlObj]) elif domainName.endswith('bandcamp.com'): if '/album/' in self.url: log.debug("Url is a direct url (bandcamp album)") metaUrlObjs = Main.WebParser.LinksGrabber.get_bandcamp_album_dl_links(self.url) links_gen = (x for x in metaUrlObjs) elif '/track/' in self.url: log.debug("Url is a direct url (bandcamp)") metaUrlObj = Main.WebParser.LinksGrabber.get_bandcamp_dl_link(self.url) links_gen = (x for x in [metaUrlObj]) else: links_gen = (x for x in []) elif domainName.endswith('soundcloud.com'): log.debug("Url is a direct url (Soundcloud)") if self.url.startswith('https://'): self.url = self.url.replace('https://', 'http://') metaUrlObj = Main.WebParser.LinksGrabber.get_soundcloud_dl_link(self.url) links_gen = (x for x in [metaUrlObj]) else: ext = self.url.split('/')[-1].split('.')[-1] if re.match(r"^http://.*soundcloud\.com/.+/.+/download$", self.url): log.debug("Url is a direct url (Soundcloud).") metaUrlObj = utils.classes.MetaUrl(self.url, "Direct Link") elif ext in ['mp3', 'mp4', 'flv', 'webm']: log.debug("Url is a direct url (%s file)." % ext) metaUrlObj = utils.classes.MetaUrl(self.url, "Direct Link") elif ext: log.debug("Url is a direct url (%s - Non-multimedia file)." % ext) metaUrlObj = utils.classes.MetaUrl(self.url, "Direct Non-Multimedia Link") else: log.debug("Url is a direct url, no extention provided.") log.error("got NotSupportedFiletypeException() for the \"%s\" extention." % ext) self.error.emit(NotSupportedFiletypeException(ext)) return links_gen = (x for x in [metaUrlObj])