Пример #1
0
def get_stats():
	print 'Fetching NBA player stats...'
	stats_outfile = RUNDAY+'_nba_stats.csv'
	csvout = open(stats_outfile, 'wb')

	NUM_THREADS = 8

	in_queue = Queue()
	out_queue = Queue()
	queue_players(in_queue)

	while not in_queue.empty():	
		jobs = []

		for i in range(NUM_THREADS):
			if not in_queue.empty():
				thread = Process(target=get_stats_helper, args=(in_queue, out_queue))
				jobs.append(thread)
				thread.start()
		for thread in jobs:
			thread.join()	

		while not out_queue.empty():
			player = out_queue.get()
			del player['SUCCESS']
			try: 
				name = player['NAME']
			except KeyError as e:
				continue
			player['TIME'] = RUNDAY
			fieldnames = [
				'TIME',
				'NAME', 
				'JERSEY',
				'SPORT',
				'TEAM',
				'POSITION',
				'PTS',
				'REB',
				'AST',
				'URL'
			]
		
			csvwriter = csv.DictWriter(csvout, delimiter='|', fieldnames=fieldnames)
			csvwriter.writerow(player)
	csvout.close()

	print 'Finished fetching NBA player stats.'
	print 'Ouput saved in %s' % stats_outfile
Пример #2
0
def find_words(start_words,
               center_words=None,
               neg_words=None,
               min_sim=0.6,
               max_sim=1.,
               alpha=0.25):
    if center_words == None and neg_words == None:
        min_sim = max(min_sim, 0.6)
    center_vec, neg_vec = np.zeros([word_size]), np.zeros([word_size])
    if center_words:  # 中心向量是所有中心种子词向量的平均
        _ = 0
        for w in center_words:
            if w in word2vec.wv.vocab:
                center_vec += word2vec[w]
                _ += 1
        if _ > 0:
            center_vec /= _
    if neg_words:  # 负向量是左右负种子词向量的平均(此处无用)
        _ = 0
        for w in neg_words:
            if w in word2vec.wv.vocab:
                neg_vec += word2vec[w]
                _ += 1
        if _ > 0:
            neg_vec /= _
    queue_count = 1
    task_count = 0
    cluster = []
    queue = Queue()  # 建立队列
    for w in start_words:
        queue.put((0, w))
        if w not in cluster:
            cluster.append(w)
    while not queue.empty():
        idx, word = queue.get()
        queue_count -= 1
        task_count += 1
        sims = most_similar(word, center_vec, neg_vec)
        min_sim_ = min_sim + (max_sim - min_sim) * (1 - np.exp(-alpha * idx))
        if task_count % 10 == 0:
            log = '%s in cluster, %s in queue, %s tasks done, %s min_sim' % (
                len(cluster), queue_count, task_count, min_sim_)
            print(log)
        for i, j in sims:
            if j >= min_sim_:
                if i not in cluster and is_good(i):  # is_good是人工写的过滤规则
                    queue.put((idx + 1), i)
                    if i not in cluster and is_good(i):
                        cluster.append(i)
                    queue_count += 1
    return cluster
Пример #3
0
def download_sifts():
    # download all sift files

    # first try to download a tarball containing all the sift xmls

    if not os.path.exists(SIFTS_TARBALL):
        try:
            urlretrieve(S3_BUCKET_URL.format('sifts.tar'), SIFTS_TARBALL)
        except RuntimeError:
            logging.warning('failed downloading sifts tarball')
        else:
            tf = tarfile.TarFile(SIFTS_TARBALL)
            tf.extractall(BASEDIR)  # the tarball contains directory data/sifts

    try:
        os.mkdir(SIFTS_FILE.format(''))
    except FileExistsError:
        pass
    ftp = ftplib.FTP("ftp.ebi.ac.uk")
    ftp.login()
    ftp.cwd('/pub/databases/msd/sifts/xml')
    filenames = ftp.nlst()  # get filenames within the directory
    ftp.quit()  # This is the “polite” way to close a connection
    filename_queue = Queue()
    for filename in filenames:
        filename_queue.put(filename)

    ftp_processes = [
        Process(target=download_ftp, args=(filename_queue, ))
        for _ in range(10)
    ]
    for process in ftp_processes:
        process.start()

    try:
        while not filename_queue.empty():
            print('{}/{} sifts downloaded'.format(
                len(filenames) - filename_queue.qsize(), len(filenames)),
                end='\r')
            time.sleep(1)

        for process in ftp_processes:
            process.join()
    except KeyboardInterrupt:
        # TODO kill not supported by multiprocessing.dummy..
        for process in ftp_processes:
            process.kill()
Пример #4
0
def stat_files():
	all_files = []
	for root, dirs, files in os.walk('/home/gzguoyubo/mf/tw2/res/entities/custom_type'):
		ignore = False
		for ig_path in ignore_paths:
			if ig_path in root:
				ignore = True
		if ignore:
			continue
		for fname in files:
			if not fname.endswith('.py'):
				continue
			abs_file_path = join(root, fname)
			all_files.append(abs_file_path)
	
	file_sections = []
	file_total_nums = len(all_files)
	for i in xrange(P_NUM):
		start = i * file_total_nums / P_NUM
		stop = start + file_total_nums / P_NUM
		if i == P_NUM - 1:
			stop = -1
		file_sections.append(all_files[start : stop])

	res_queue = Queue()
	processes = []
	for section in file_sections:
		p = Process(target=stat_file, args=(section, res_queue))
		p.start()
		processes.append(p)
	
	for p in processes:
		p.join()
	
	total_stats = defaultdict(int)
	while not res_queue.empty():
		stat = res_queue.get()
		for author, cnt in stat.iteritems():
			total_stats[author] += cnt
	
	print total_stats
Пример #5
0
def proxy_thr(fun):
    """
    多线程获取代理地址
    :param fun: 处理url函数,用于获取页面内的proxy
    :return:
    """
    q = Queue()
    pool = Pool(40)

    proxys = []
    i = 1
    for u in proxy_url_list():
        pool.apply_async(fun, (q, u))
    pool.close()
    pool.join()

    while 1:
        if q.empty():
            break
        s = q.get().split("\n")
        proxys += s

    proxys = set(proxys)
    return list(proxys)
Пример #6
0
class mmonly:
    def __init__(self):
        self.ua = UserAgent()
        self.headers = {}
        self.q1 = Queue(300)
        self.q2 = Queue(1000)
        self.lock = Lock()
        # self.path = 'D:/IMG/'
        self.main_page_urls = []
        self.subpageurls = []
        conn = sqlite3.connect('mmonly.db')
        conn.isolation_level = None
        try:
            conn.execute(
                '''create table subpageurl(url text primary key not null)''')
            conn.execute(
                '''create table imgurl(url text primary key not null)''')
        except (Exception) as e:
            print('创建表:{}'.format(e).decode('utf-8').encode(type))
        finally:
            conn.close()
        self.rootpath = os.getcwd().replace('\\', '/')
        self.path = os.path.join(self.rootpath, 'imges/')
        if not os.path.exists(self.path):
            os.mkdir(self.path)

    def get_mainpage_urls(self, inurl):
        # 得到所有主页url
        self.headers['User-Agent'] = self.ua.random
        try:
            req = requests.get(inurl, headers=self.headers, timeout=10)
            req.encoding = 'gbk'
            cont = req.text
            content = pq(cont)
            elem = list(content('div #pageNum').children('a').items())
            for ele in elem:
                if ele.text() == '末页':
                    pgnum = int(ele.attr('href').split('_')[-1].split('.')[0])
            spurl = inurl.split('_')
            for i in range(1, pgnum + 1):
                self.main_page_urls.append('{}_{}_{}.html'.format(
                    spurl[0], spurl[1], str(i)))
            print('主页计算完毕!!'.decode('utf-8').encode(type))
        except (Exception) as e:
            self.lock.acquire()
            print('主页读取错误:{}'.format(e).decode('utf-8').encode(type))
            self.lock.release()
            return

    def get_subpage_urls(self, inurl):
        # 得到所有子页面url
        self.headers['User-Agent'] = self.ua.random
        try:
            req = requests.get(inurl, headers=self.headers, timeout=10)
            req.encoding = 'gbk'
            cont = req.text
            content = pq(cont)
            elems = list(content('div .ABox').children('a').items())
            for ele in elems:
                url = ele.attr('href')
                self.q1.put(url)
                print('取得子页面地址:{}'.format(url).decode('utf-8').encode(type))
        except (Exception) as e:
            self.lock.acquire()
            print('遍历主页面读取错误:{}'.format(e).decode('utf-8').encode(type))
            self.lock.release()
            return

    def savesuburl(self):
        # 将子页面url存入数据库subpageurl表中

        while 1:
            try:
                suburl = self.q1.get(timeout=20)
                self.subpageurls.append(suburl)
                print('列表存入子页面:{}'.format(suburl).decode('utf-8').encode(type))
            except (Exception) as e:
                print('读取子页面url:{}'.format(e).decode('utf-8').encode(type))
                time.sleep(2)
                if self.q1.empty():
                    time.sleep(2)
                    if self.q1.empty():
                        break
        conn = sqlite3.connect('mmonly.db')
        cur = conn.cursor()
        time.sleep(4)
        print('开始将子页面url写入数据库'.decode('utf-8').encode(type))
        for date in self.subpageurls:
            try:
                cur.execute('insert into subpageurl values(?)', (date, ))
                print('写入:{}'.format(date).decode('utf-8').encode(type))
            except (Exception) as er:
                print('写入数据库错误:{}'.format(er).decode('utf-8').encode(type))

        conn.commit()
        conn.close()
        print('写入完毕!!'.decode('utf-8').encode(type))

    def get_img_url(self, inurl):
        # get图片地址
        self.headers['User-Agent'] = self.ua.random
        try:
            req = requests.get(inurl, headers=self.headers, timeout=10)
            time.sleep(0.2)
            req.encoding = 'gbk'
            cont = req.text
            content = pq(cont)
            imgnum = int(content('.totalpage').text())
            urlsp = '.'.join(inurl.split('.')[:-1])
            for n in range(1, imgnum + 1):
                imgpage = '{}_{}.html'.format(urlsp, n)
                self.headers['User-Agent'] = self.ua.random
                try:
                    req = requests.get(imgpage,
                                       headers=self.headers,
                                       timeout=10)
                    time.sleep(0.3)
                    req.encoding = 'gbk'
                    cont = req.text
                    content = pq(cont)
                    imgurl = content('.down-btn').attr('href')
                    self.q2.put(imgurl)
                except (Exception) as ee:
                    print('get图片url错误:{}'.format(ee).decode('utf-8').encode(
                        type))
                print(
                    'get图片url:{}'.format(imgurl).decode('utf-8').encode(type))
        except (Exception) as e:
            print('get图片页面地址错误:{}'.format(e).decode('utf-8').encode(type))
            return

    def download(self, inurl):
        # 下载图片
        # inurl = q.get(timeout=10)
        na = inurl.split('/')
        imgname = '{}{}'.format(na[-2], na[-1])
        imgpath = '{}{}'.format(self.path, imgname)
        statu = os.path.exists(imgpath)
        if not statu:
            self.headers['User-Agent'] = self.ua.random
            try:
                req = requests.get(inurl, headers=self.headers,
                                   timeout=8).content
                with open(imgpath, 'wb') as f:
                    f.write(req)
                self.lock.acquire()
                print('下载图片:{}'.format(imgname).decode('utf-8').encode(type))
                self.lock.release()
            except (Exception) as e:
                self.lock.acquire()
                print('下载错误:{}'.format(e).decode('utf-8').encode(type))
                self.lock.release()
        else:
            self.lock.acquire()
            print('重复图片:{}'.format(imgname).decode('utf-8').encode(type))
            self.lock.release()

    def run(self, inurl):
        ch = eval(
            input('输入1表示采集页面\n输入2表示下载图片\n输入3退出程序\n输入:'.decode('utf-8').encode(
                type)))
        if ch == 1:
            self.get_mainpage_urls(inurl)
            time.sleep(4)
            pool1 = Pool(20)
            for mainurl in self.main_page_urls:
                pool1.apply_async(self.get_subpage_urls, (mainurl, ))
            time.sleep(1)
            self.savesuburl()
            pool1.close()
            pool1.join()
            print('子页面采集完毕!!!'.decode('utf-8').encode(type))
            self.run('http://www.mmonly.cc/mmtp/list_9_2.html')
        elif ch == 2:
            conn = sqlite3.connect('mmonly.db')
            cur = conn.cursor()

            pool2 = Pool(10)
            pool3 = Pool(30)
            cur.execute('select * from subpageurl')
            suburls = cur.fetchall()

            while 1:
                for nn in range(200):
                    try:
                        for i in suburls:
                            pool2.apply_async(self.get_img_url, i)
                            cur.execute('delete from subpageurl where url=?',
                                        i)

                        while 1:
                            img = self.q2.get(timeout=20)
                            pool3.apply_async(self.download, (img, ))
                    except (Exception) as e:
                        print('数据库读取子页面url:{}'.format(e).decode(
                            'utf-8').encode(type))
                        time.sleep(2)
                        if self.q2.empty():
                            time.sleep(2)
                            if self.q2.empty():
                                break

                conn.commit()
                conn.close()
                conn = sqlite3.connect('mmonly.db')
                cur = conn.cursor()
                cur.execute('select * from subpageurl')
                suburls = cur.fetchall()
                time.sleep(2)
                if self.q2.empty():
                    time.sleep(2)
                    if self.q2.empty():
                        break
            pool3.close()
            pool2.close()
            pool3.join()
            pool2.join()
        else:
            print('结束程序!'.decode('utf-8').encode(type))
class tizyoutubeproxy(object):
    """A class that accesses YouTube, retrieves stream URLs and creates and manages
    a playback queue.

    """
    def __init__(self):
        self.queue = list()
        self.queue_index = -1
        self.play_queue_order = list()
        self.play_modes = TizEnumeration(["NORMAL", "SHUFFLE"])
        self.current_play_mode = self.play_modes.NORMAL
        self.now_playing_stream = None
        # Create multiprocess queues
        self.task_queue = Queue()
        self.done_queue = Queue()
        # Workers
        self.workers = list()

    def set_play_mode(self, mode):
        """ Set the playback mode.

        :param mode: current valid values are "NORMAL" and "SHUFFLE"

        """
        self.current_play_mode = getattr(self.play_modes, mode)
        self.__update_play_queue_order()

    def enqueue_audio_stream(self, arg):
        """Add the audio stream of a YouTube video to the
        playback queue.

        :param arg: a search string

        """
        logging.info('arg : %s', arg)
        try:

            yt_video = pafy.new(arg)
            yt_audio = yt_video.getbestaudio(preftype="webm")
            if not yt_audio:
                raise ValueError(str("No WebM audio stream for : %s" % arg))

            yt_info = VideoInfo(ytid=arg, title=yt_audio.title)
            self.add_to_playback_queue(audio=yt_audio,
                                       video=yt_video,
                                       info=yt_info)

            self.__update_play_queue_order()

        except ValueError:
            raise ValueError(str("Video not found : %s" % arg))

    def enqueue_audio_playlist(self, arg):
        """Add all audio streams in a YouTube playlist to the playback queue.

        :param arg: a YouTube playlist id

        """
        logging.info('arg : %s', arg)
        try:
            count = len(self.queue)

            playlist = pafy.get_playlist2(arg)
            if len(playlist) > 0:
                for yt_video in playlist:
                    self.add_to_playback_queue(video=yt_video, \
                                               info=VideoInfo(ytid=yt_video.videoid, \
                                                              title=yt_video.title))

            if count == len(self.queue):
                raise ValueError

            self.__update_play_queue_order()

        except ValueError:
            raise ValueError(str("Playlist not found : %s" % arg))

    def enqueue_audio_search(self, arg):
        """Search YouTube and add the audio streams to the
        playback queue.

        :param arg: a search string

        """
        logging.info('arg : %s', arg)
        try:
            query = generate_search_query(arg)
            wdata = pafy.call_gdata('search', query)

            wdata2 = wdata
            count = 0
            while True:
                for track_info in get_tracks_from_json(wdata2):
                    self.add_to_playback_queue(info=track_info)
                    count += 1

                if count > 100:
                    break
                if not wdata2.get('nextPageToken'):
                    break
                query['pageToken'] = wdata2['nextPageToken']
                wdata2 = pafy.call_gdata('search', query)

            self.__update_play_queue_order()

        except ValueError:
            raise ValueError(str("Could not find any mixes : %s" % arg))

    def enqueue_audio_mix(self, arg, feelinglucky=True):
        """Obtain a YouTube mix associated to a given video id or url and add all audio
        streams in the mix playlist to the playback queue.

        :param arg: a YouTube video id

        :param feelinglucky: If True, it will perform another YouTube search to find
        alternatives if the original mix cannot be found.

        """
        logging.info('arg : %s', arg)
        yt_video = None
        try:
            count = len(self.queue)

            yt_video = pafy.new(arg)
            playlist = yt_video.mix
            if len(playlist) > 0:
                for yt_video in playlist:
                    video_id = yt_video.videoid
                    video_title = yt_video.title
                    yt_info = VideoInfo(ytid=video_id, title=video_title)
                    self.add_to_playback_queue(video=yt_video, info=yt_info)

            if count == len(self.queue):
                raise ValueError

            self.__update_play_queue_order()

        except IndexError:
            if not feelinglucky:
                raise ValueError
            else:
                print_wrn("[YouTube] Could not find a mix for '{0}'. "\
                          "Searching YouTube instead. Feeling lucky?." \
                          .format(arg.encode('utf-8')))
                if yt_video.title:
                    self.enqueue_audio_search(yt_video.title)
                else:
                    self.enqueue_audio_stream(arg)

    def enqueue_audio_mix_search(self, arg):
        """Obtain a YouTube mix associated to a given textual search and add all the
        audio streams in the mix playlist to the playback queue.

        :param arg: a search string

        """
        logging.info('arg : %s', arg)
        try:
            query = generate_search_query(arg)
            wdata = pafy.call_gdata('search', query)

            wdata2 = wdata
            count = len(self.queue)
            for track_info in get_tracks_from_json(wdata2):
                if track_info and track_info.ytid:
                    try:
                        self.enqueue_audio_mix(track_info.ytid,
                                               feelinglucky=False)
                        break
                    except ValueError:
                        logging.info(
                            'Could not find a mix. Trying another video')

            if count == len(self.queue):
                raise ValueError

        except ValueError:
            raise ValueError(str("Could not find any mixes : %s" % arg))

    def current_audio_stream_title(self):
        """ Retrieve the current stream's title.

        """
        stream = self.now_playing_stream
        title = ''
        if stream:
            title = to_ascii(stream['a'].title).encode("utf-8")
        return title

    def current_audio_stream_author(self):
        """ Retrieve the current stream's author.

        """
        stream = self.now_playing_stream
        author = ''
        if stream:
            author = to_ascii(stream['v'].author).encode("utf-8")
        return author

    def current_audio_stream_file_size(self):
        """ Retrieve the current stream's file size.

        """
        stream = self.now_playing_stream
        size = 0
        if stream:
            size = stream['a'].get_filesize()
        return size

    def current_audio_stream_duration(self):
        """ Retrieve the current stream's duration.

        """
        stream = self.now_playing_stream
        duration = ''
        if stream:
            duration = to_ascii(stream['v'].duration).encode("utf-8")
        return duration

    def current_audio_stream_bitrate(self):
        """ Retrieve the current stream's bitrate.

        """
        stream = self.now_playing_stream
        bitrate = ''
        if stream:
            bitrate = stream['a'].bitrate
        return bitrate

    def current_audio_stream_view_count(self):
        """ Retrieve the current stream's view count.

        """
        stream = self.now_playing_stream
        viewcount = 0
        if stream:
            viewcount = stream['v'].viewcount
        return viewcount

    def current_audio_stream_description(self):
        """ Retrieve the current stream's description.

        """
        stream = self.now_playing_stream
        description = ''
        if stream:
            description = to_ascii(stream['v'].description).encode("utf-8")
        return description

    def current_audio_stream_file_extension(self):
        """ Retrieve the current stream's file extension.

        """
        stream = self.now_playing_stream
        file_extension = ''
        if stream:
            file_extension = to_ascii(stream['a'].extension).encode("utf-8")
        return file_extension

    def current_audio_stream_video_id(self):
        """ Retrieve the current stream's video id.

        """
        stream = self.now_playing_stream
        video_id = ''
        if stream:
            video_id = to_ascii(stream['i'].ytid).encode("utf-8")
        return video_id

    def current_audio_stream_published(self):
        """ Retrieve the current stream's upload date and time.

        """
        stream = self.now_playing_stream
        if stream:
            published = to_ascii(stream['v'].published).encode("utf-8")
        return published

    def current_audio_stream_queue_index_and_queue_length(self):
        """ Retrieve index in the queue (starting from 1) of the current stream and the
        length of the playback queue.

        """
        return self.queue_index + 1, len(self.queue)

    def clear_queue(self):
        """ Clears the playback queue.

        """
        self.queue = list()
        self.queue_index = -1

    def remove_current_url(self):
        """Remove the currently active url from the playback queue.

        """
        logging.info("")
        if len(self.queue) and self.queue_index:
            stream = self.queue[self.queue_index]
            print_nfo("[YouTube] [Stream] '{0}' removed." \
                      .format(to_ascii(stream['i'].title).encode("utf-8")))
            del self.queue[self.queue_index]
            self.queue_index -= 1
            if self.queue_index < 0:
                self.queue_index = 0
            self.__update_play_queue_order()

    def next_url(self):
        """ Retrieve the url of the next stream in the playback queue.

        """
        logging.info("")
        try:
            if len(self.queue):
                self.queue_index += 1
                if (self.queue_index < len(self.queue)) \
                   and (self.queue_index >= 0):
                    next_stream = self.queue[self.play_queue_order \
                                            [self.queue_index]]
                    return self.__retrieve_stream_url(
                        next_stream, self.queue_index).rstrip()
                else:
                    self.queue_index = -1
                    return self.next_url()
            else:
                return ''
        except (KeyError, AttributeError):
            # TODO: We don't remove this for now
            # del self.queue[self.queue_index]
            logging.info("exception")
            return self.next_url()

    def prev_url(self):
        """ Retrieve the url of the previous stream in the playback queue.

        """
        logging.info("")
        try:
            if len(self.queue):
                self.queue_index -= 1
                if (self.queue_index < len(self.queue)) \
                   and (self.queue_index >= 0):
                    prev_stream = self.queue[self.play_queue_order \
                                            [self.queue_index]]
                    return self.__retrieve_stream_url(
                        prev_stream, self.queue_index).rstrip()
                else:
                    self.queue_index = len(self.queue)
                    return self.prev_url()
            else:
                return ''
        except (KeyError, AttributeError):
            # TODO: We don't remove this for now
            # del self.queue[self.queue_index]
            logging.info("exception")
            return self.prev_url()

    def __update_play_queue_order(self):
        """ Update the queue playback order.

        A sequential order is applied if the current play mode is "NORMAL" or a
        random order if current play mode is "SHUFFLE"

        """
        total_streams = len(self.queue)
        if total_streams:
            if not len(self.play_queue_order):
                # Create a sequential play order, if empty
                self.play_queue_order = range(total_streams)
            if self.current_play_mode == self.play_modes.SHUFFLE:
                random.shuffle(self.play_queue_order)
            print_nfo("[YouTube] [Streams in queue] '{0}'." \
                      .format(total_streams))

    def __retrieve_stream_url(self, stream, queue_index):
        """ Retrieve a stream url

        """
        try:
            if not len(self.workers):
                for _ in range(WORKER_PROCESSES):
                    proc = Process(target=obtain_stream, \
                                   args=(self.task_queue, \
                                         self.done_queue)).start()
                    self.workers.append(proc)

            while not self.done_queue.empty():
                stream = self.done_queue.get()
                self.queue[stream['q']] = stream

            stream = self.queue[queue_index]
            if not stream.get('v') or not stream.get('a'):
                logging.info("ytid : %s", stream['i'].ytid)
                video = stream.get('v')
                if not video:
                    video = pafy.new(stream['i'].ytid)
                audio = video.getbestaudio(preftype="webm")
                if not audio:
                    logging.info("no suitable audio found")
                    raise AttributeError()
                stream.update({'a': audio, 'v': video})

            # streams = stream.get('v').audiostreams[::-1]
            # pprint.pprint(streams)
            # dump_stream_info(streams)

            self.now_playing_stream = stream
            return stream['a'].url.encode("utf-8")

        except AttributeError:
            logging.info("Could not retrieve the stream url!")
            raise

    def add_to_playback_queue(self, audio=None, video=None, info=None):
        """ Add to the playback queue. """

        if audio:
            print_nfo("[YouTube] [Stream] '{0}' [{1}]." \
                      .format(to_ascii(audio.title).encode("utf-8"), \
                              to_ascii(audio.extension)))
        if info:
            print_nfo("[YouTube] [Stream] '{0}'." \
                      .format(to_ascii(info.title).encode("utf-8")))
        queue_index = len(self.queue)
        self.task_queue.put(dict(a=audio, v=video, i=info, q=queue_index))
        self.queue.append(dict(a=audio, v=video, i=info, q=queue_index))
Пример #8
0
class tizyoutubeproxy(object):
    """A class that accesses YouTube, retrieves stream URLs and creates and manages
    a playback queue.

    """
    def __init__(self, api_key=API_KEY):
        self.queue = list()
        self.queue_index = -1
        self.play_queue_order = list()
        self.play_modes = TizEnumeration(["NORMAL", "SHUFFLE"])
        self.current_play_mode = self.play_modes.NORMAL
        self.now_playing_stream = None
        # Create multiprocess queues
        self.task_queue = Queue()
        self.done_queue = Queue()
        # Workers
        self.workers = list()
        self.api_key = api_key if api_key != "" else API_KEY
        pafy.set_api_key(self.api_key)

    def set_play_mode(self, mode):
        """ Set the playback mode.

        :param mode: current valid values are "NORMAL" and "SHUFFLE"

        """
        self.current_play_mode = getattr(self.play_modes, mode)
        self._update_play_queue_order()

    def enqueue_audio_stream(self, arg):
        """Add the audio stream of a YouTube video to the
        playback queue.

        :param arg: a search string

        """
        logging.info("arg : %s", arg)
        try:
            print_msg("[YouTube] [Audio strean] : '{0}'. ".format(arg))

            yt_search = MEMORY.cache(run_youtube_search)
            yt_video = yt_search(arg)
            yt_audio = yt_video.getbestaudio(preftype="webm")
            if not yt_audio:
                raise ValueError(str("No WebM audio stream for : %s" % arg))

            yt_info = VideoInfo(ytid=arg, title=yt_audio.title)
            self._add_to_playback_queue(audio=yt_audio,
                                        video=yt_video,
                                        info=yt_info)

            self._update_play_queue_order()

        except ValueError:
            raise ValueError(str("Video not found : %s" % arg))

    def enqueue_audio_playlist(self, arg):
        """Add all audio streams in a YouTube playlist to the playback queue.

        :param arg: a YouTube playlist id

        """
        logging.info("arg : %s", arg)
        try:
            print_msg("[YouTube] [Audio playlist] : '{0}'. ".format(arg))
            count = len(self.queue)

            yt_pl_search = MEMORY.cache(run_youtube_playlist_search)
            playlist = yt_pl_search(arg)

            if len(playlist) > 0:
                for yt_video in playlist:
                    self._add_to_playback_queue(
                        video=yt_video,
                        info=VideoInfo(ytid=yt_video.videoid,
                                       title=yt_video.title),
                    )

            if count == len(self.queue):
                raise ValueError

            self._update_play_queue_order()

        except ValueError:
            raise ValueError(str("Playlist not found : %s" % arg))

    def enqueue_audio_search(self, arg):
        """Search YouTube and add the audio streams to the
        playback queue.

        :param arg: a search string

        """
        logging.info("arg : %s", arg)
        try:
            print_msg("[YouTube] [Audio search] : '{0}'. ".format(arg))
            yt_dt_search = MEMORY.cache(run_youtube_data_search)
            query = generate_search_query(arg, self.api_key)
            wdata = yt_dt_search("search", query)

            wdata2 = wdata
            count = 0
            while True:
                for track_info in get_tracks_from_json(wdata2):
                    self._add_to_playback_queue(info=track_info)
                    count += 1

                if count > 100:
                    break
                if not wdata2.get("nextPageToken"):
                    break
                query["pageToken"] = wdata2["nextPageToken"]
                wdata2 = yt_dt_search("search", query)

            self._update_play_queue_order()

        except ValueError:
            raise ValueError(str("Could not find any mixes : %s" % arg))

    def enqueue_audio_mix(self, arg, feelinglucky=True):
        """Obtain a YouTube mix associated to a given video id or url and add all audio
        streams in the mix playlist to the playback queue.

        :param arg: a YouTube video id

        :param feelinglucky: If True, it will perform another YouTube search to find
        alternatives if the original mix cannot be found.

        """
        logging.info("arg : %s", arg)
        yt_video = None
        try:
            print_msg("[YouTube] [Audio mix] : '{0}'. ".format(arg))
            count = len(self.queue)

            yt_search = MEMORY.cache(run_youtube_search)
            yt_video = yt_search(arg)
            playlist = yt_video.mix
            if len(playlist) > 0:
                for yt_video in playlist:
                    video_id = yt_video.videoid
                    video_title = yt_video.title
                    yt_info = VideoInfo(ytid=video_id, title=video_title)
                    self._add_to_playback_queue(video=yt_video, info=yt_info)

            if count == len(self.queue):
                raise ValueError

            self._update_play_queue_order()

        except IndexError:
            if not feelinglucky:
                raise ValueError
            else:
                print_adv("[YouTube] Could not find a mix for '{0}'. "
                          "Searching YouTube instead. Feeling lucky?.".format(
                              arg.encode("utf-8")))
                if yt_video.title:
                    self.enqueue_audio_search(yt_video.title)
                else:
                    self.enqueue_audio_stream(arg)

    def enqueue_audio_mix_search(self, arg):
        """Obtain a YouTube mix associated to a given textual search and add all the
        audio streams in the mix playlist to the playback queue.

        :param arg: a search string

        """
        logging.info("arg : %s", arg)
        try:
            print_msg("[YouTube] [Audio mix search] : '{0}'. ".format(arg))
            yt_dt_search = MEMORY.cache(run_youtube_data_search)
            wdata = yt_dt_search("search",
                                 generate_search_query(arg, self.api_key))

            wdata2 = wdata
            count = len(self.queue)
            for track_info in get_tracks_from_json(wdata2):
                if track_info and track_info.ytid:
                    try:
                        self.enqueue_audio_mix(track_info.ytid,
                                               feelinglucky=False)
                        break
                    except ValueError:
                        logging.info(
                            "Could not find a mix. Trying another video")

            if count == len(self.queue):
                raise ValueError

        except ValueError:
            raise ValueError(str("Could not find any mixes : %s" % arg))

    def enqueue_audio_channel_uploads(self, arg):
        """Add all audio streams in a YouTube channel to the playback queue.

        :param arg: a YouTube channel url

        """
        logging.info("arg : %s", arg)
        try:
            print_msg(
                "[YouTube] [Audio channel uploads] : '{0}'. ".format(arg))
            count = len(self.queue)

            yt_ch_search = MEMORY.cache(run_youtube_channel_search)
            channel = yt_ch_search(arg)
            if channel:
                for yt_video in channel.uploads:
                    self._add_to_playback_queue(
                        video=yt_video,
                        info=VideoInfo(ytid=yt_video.videoid,
                                       title=yt_video.title),
                    )

            if count == len(self.queue):
                raise ValueError

            self._update_play_queue_order()

        except ValueError:
            raise ValueError(str("Channel not found : %s" % arg))

    def enqueue_audio_channel_playlist(self, channel_name, playlist_name):
        """Search a playlist within a channel and if found, adds all the audio streams
        to the playback queue.

        :param arg: a YouTube playlist id

        """
        logging.info("args : %s - %s", channel_name, playlist_name)
        try:
            print_msg(
                "[YouTube] [Audio channel playlist] : '{0} - {1}'. ".format(
                    channel_name, playlist_name))
            count = len(self.queue)
            yt_ch_search = MEMORY.cache(run_youtube_channel_search)
            channel = yt_ch_search(channel_name)

            if channel:
                pl_dict = dict()
                pl_titles = list()
                pl_name = ""
                playlist = None
                for pl in channel.playlists:
                    print_nfo("[YouTube] [Playlist] '{0}'.".format(
                        to_ascii(pl.title)))
                    if fuzz.partial_ratio(playlist_name, pl.title) > 50:
                        pl_dict[pl.title] = pl
                        pl_titles.append(pl.title)

                if len(pl_titles) > 1:
                    pl_name = process.extractOne(playlist_name, pl_titles)[0]
                    playlist = pl_dict[pl_name]
                elif len(pl_titles) == 1:
                    pl_name = pl_titles[0]
                    playlist = pl_dict[pl_name]

                if pl_name:
                    if pl_name.lower() != playlist_name.lower():
                        print_adv("[YouTube] Playlist '{0}' not found. "
                                  "Playing '{1}' instead.".format(
                                      to_ascii(playlist_name),
                                      to_ascii(pl_name)))
                    for yt_video in playlist:
                        self._add_to_playback_queue(
                            video=yt_video,
                            info=VideoInfo(ytid=yt_video.videoid,
                                           title=yt_video.title),
                        )

            if count == len(self.queue):
                raise ValueError

            self._update_play_queue_order()

        except ValueError:
            raise ValueError(str("Channel not found : %s" % channel_name))

    def current_audio_stream_title(self):
        """ Retrieve the current stream's title.

        """
        stream = self.now_playing_stream
        title = ""
        if stream:
            title = to_ascii(stream["a"].title)
        return title

    def current_audio_stream_author(self):
        """ Retrieve the current stream's author.

        """
        stream = self.now_playing_stream
        author = ""
        if stream:
            author = to_ascii(stream["v"].author)
        return author

    def current_audio_stream_file_size(self):
        """ Retrieve the current stream's file size.

        """
        stream = self.now_playing_stream
        size = 0
        if stream:
            size = stream["a"].get_filesize()
        return size

    def current_audio_stream_duration(self):
        """ Retrieve the current stream's duration.

        """
        stream = self.now_playing_stream
        duration = ""
        if stream:
            duration = to_ascii(stream["v"].duration)
        return duration

    def current_audio_stream_bitrate(self):
        """ Retrieve the current stream's bitrate.

        """
        stream = self.now_playing_stream
        bitrate = ""
        if stream:
            bitrate = stream["a"].bitrate
        return bitrate

    def current_audio_stream_view_count(self):
        """ Retrieve the current stream's view count.

        """
        stream = self.now_playing_stream
        viewcount = 0
        if stream:
            viewcount = stream["v"].viewcount
        return viewcount

    def current_audio_stream_description(self):
        """ Retrieve the current stream's description.

        """
        stream = self.now_playing_stream
        description = ""
        if stream:
            description = to_ascii(stream["v"].description)
        return description

    def current_audio_stream_file_extension(self):
        """ Retrieve the current stream's file extension.

        """
        stream = self.now_playing_stream
        file_extension = ""
        if stream:
            file_extension = to_ascii(stream["a"].extension)
        return file_extension

    def current_audio_stream_video_id(self):
        """ Retrieve the current stream's video id.

        """
        stream = self.now_playing_stream
        video_id = ""
        if stream:
            video_id = to_ascii(stream["i"].ytid)
        return video_id

    def current_audio_stream_published(self):
        """ Retrieve the current stream's upload date and time.

        """
        stream = self.now_playing_stream
        if stream:
            published = to_ascii(stream["v"].published)
        return published

    def current_audio_stream_queue_index_and_queue_length(self):
        """ Retrieve index in the queue (starting from 1) of the current stream and the
        length of the playback queue.

        """
        return self.play_queue_order[self.queue_index] + 1, len(self.queue)

    def clear_queue(self):
        """ Clears the playback queue.

        """
        self.queue = list()
        self.queue_index = -1

    def remove_current_url(self):
        """Remove the currently active url from the playback queue.

        """
        logging.info("")
        if len(self.queue) and self.queue_index:
            stream = self.queue[self.queue_index]
            print_nfo("[YouTube] [Stream] '{0}' removed.".format(
                to_ascii(stream["i"].title)))
            del self.queue[self.queue_index]
            self.queue_index -= 1
            if self.queue_index < 0:
                self.queue_index = 0
            self._update_play_queue_order()

    def next_url(self):
        """ Retrieve the url of the next stream in the playback queue.

        """
        logging.info("")
        try:
            if len(self.queue):
                self.queue_index += 1
                if (self.queue_index < len(self.queue)) and (self.queue_index
                                                             >= 0):
                    next_stream = self.queue[self.play_queue_order[
                        self.queue_index]]
                    return self._retrieve_stream_url(
                        next_stream,
                        self.play_queue_order[self.queue_index]).rstrip()
                else:
                    self.queue_index = -1
                    return self.next_url()
            else:
                return ""
        except (KeyError, AttributeError):
            # TODO: We don't remove this for now
            # del self.queue[self.queue_index]
            logging.info("KeyError, or AttributeError exception")
            return self.next_url()
        except (IOError):
            # Remove this video
            del self.queue[self.queue_index]
            logging.info("IOError exception")
            return self.next_url()

    def prev_url(self):
        """ Retrieve the url of the previous stream in the playback queue.

        """
        logging.info("")
        try:
            if len(self.queue):
                self.queue_index -= 1
                if (self.queue_index < len(self.queue)) and (self.queue_index
                                                             >= 0):
                    prev_stream = self.queue[self.play_queue_order[
                        self.queue_index]]
                    return self._retrieve_stream_url(
                        prev_stream,
                        self.play_queue_order[self.queue_index]).rstrip()
                else:
                    self.queue_index = len(self.queue)
                    return self.prev_url()
            else:
                return ""
        except (KeyError, AttributeError):
            # TODO: We don't remove this for now
            # del self.queue[self.queue_index]
            logging.info("exception")
            return self.prev_url()
        except (IOError):
            # Remove this video
            del self.queue[self.queue_index]
            logging.info("IOError exception")
            return self.next_url()

    def _update_play_queue_order(self):
        """ Update the queue playback order.

        A sequential order is applied if the current play mode is "NORMAL" or a
        random order if current play mode is "SHUFFLE"

        """
        total_streams = len(self.queue)
        if total_streams:
            if not len(self.play_queue_order):
                # Create a sequential play order, if empty
                self.play_queue_order = list(range(total_streams))
            if self.current_play_mode == self.play_modes.SHUFFLE:
                random.shuffle(self.play_queue_order)
            print_nfo(
                "[YouTube] [Streams in queue] '{0}'.".format(total_streams))

    def _retrieve_stream_url(self, stream, queue_index):
        """ Retrieve a stream url

        """
        try:
            if not len(self.workers):
                for _ in range(WORKER_PROCESSES):
                    proc = Process(target=obtain_stream,
                                   args=(self.task_queue,
                                         self.done_queue)).start()
                    self.workers.append(proc)

            while not self.done_queue.empty():
                stream = self.done_queue.get()
                self.queue[stream["q"]] = stream

            stream = self.queue[queue_index]
            if not stream.get("v") or not stream.get("a"):
                logging.info("ytid : %s", stream["i"].ytid)
                video = stream.get("v")
                if not video:
                    yt_search = MEMORY.cache(run_youtube_search)
                    video = yt_search(stream["i"].ytid)
                audio = video.getbestaudio(preftype="webm")
                if not audio:
                    logging.info("no suitable audio found")
                    raise AttributeError()
                stream.update({"a": audio, "v": video})

            # streams = stream.get('v').audiostreams[::-1]
            # pprint.pprint(streams)
            # dump_stream_info(streams)

            self.now_playing_stream = stream
            return stream["a"].url

        except AttributeError:
            logging.info("Could not retrieve the stream url!")
            raise

    def _add_to_playback_queue(self, audio=None, video=None, info=None):
        """ Add to the playback queue. """

        if audio:
            print_nfo("[YouTube] [Stream] '{0}' [{1}].".format(
                to_ascii(audio.title), to_ascii(audio.extension)))
        if info:
            print_nfo("[YouTube] [Stream] '{0}'.".format(to_ascii(info.title)))
        queue_index = len(self.queue)
        self.task_queue.put(dict(a=audio, v=video, i=info, q=queue_index))
        self.queue.append(dict(a=audio, v=video, i=info, q=queue_index))
Пример #9
0
class tizyoutubeproxy(object):
    """A class that accesses YouTube, retrieves stream URLs and creates and manages
    a playback queue.

    """

    def __init__(self):
        self.queue = list()
        self.queue_index = -1
        self.play_queue_order = list()
        self.play_modes = TizEnumeration(["NORMAL", "SHUFFLE"])
        self.current_play_mode = self.play_modes.NORMAL
        self.now_playing_stream = None
        # Create multiprocess queues
        self.task_queue = Queue()
        self.done_queue = Queue()
        # Workers
        self.workers = list()

    def set_play_mode(self, mode):
        """ Set the playback mode.

        :param mode: current valid values are "NORMAL" and "SHUFFLE"

        """
        self.current_play_mode = getattr(self.play_modes, mode)
        self.__update_play_queue_order()

    def enqueue_audio_stream(self, arg):
        """Add the audio stream of a YouTube video to the
        playback queue.

        :param arg: a search string

        """
        logging.info('arg : %s', arg)
        try:

            yt_video = pafy.new(arg)
            yt_audio = yt_video.getbestaudio(preftype="webm")
            if not yt_audio:
                raise ValueError(str("No WebM audio stream for : %s" % arg))

            yt_info = VideoInfo(ytid=arg, title=yt_audio.title)
            self.add_to_playback_queue(audio=yt_audio, video=yt_video, info=yt_info)

            self.__update_play_queue_order()

        except ValueError:
            raise ValueError(str("Video not found : %s" % arg))

    def enqueue_audio_playlist(self, arg):
        """Add all audio streams in a YouTube playlist to the playback queue.

        :param arg: a YouTube playlist id

        """
        logging.info('arg : %s', arg)
        try:
            count = len(self.queue)

            playlist = pafy.get_playlist2(arg)
            if len(playlist) > 0:
                for yt_video in playlist:
                    self.add_to_playback_queue(video=yt_video, \
                                               info=VideoInfo(ytid=yt_video.videoid, \
                                                              title=yt_video.title))

            if count == len(self.queue):
                raise ValueError

            self.__update_play_queue_order()

        except ValueError:
            raise ValueError(str("Playlist not found : %s" % arg))

    def enqueue_audio_search(self, arg):
        """Search YouTube and add the audio streams to the
        playback queue.

        :param arg: a search string

        """
        logging.info('arg : %s', arg)
        try:
            query = generate_search_query(arg)
            wdata = pafy.call_gdata('search', query)

            wdata2 = wdata
            count = 0
            while True:
                for track_info in get_tracks_from_json(wdata2):
                    self.add_to_playback_queue(info=track_info)
                    count += 1

                if count > 100:
                    break
                if not wdata2.get('nextPageToken'):
                    break
                query['pageToken'] = wdata2['nextPageToken']
                wdata2 = pafy.call_gdata('search', query)

            self.__update_play_queue_order()

        except ValueError:
            raise ValueError(str("Could not find any mixes : %s" % arg))

    def enqueue_audio_mix(self, arg, feelinglucky=True):
        """Obtain a YouTube mix associated to a given video id or url and add all audio
        streams in the mix playlist to the playback queue.

        :param arg: a YouTube video id

        :param feelinglucky: If True, it will perform another YouTube search to find
        alternatives if the original mix cannot be found.

        """
        logging.info('arg : %s', arg)
        yt_video = None
        try:
            count = len(self.queue)

            yt_video = pafy.new(arg)
            playlist = yt_video.mix
            if len(playlist) > 0:
                for yt_video in playlist:
                    video_id = yt_video.videoid
                    video_title = yt_video.title
                    yt_info = VideoInfo(ytid=video_id, title=video_title)
                    self.add_to_playback_queue(video=yt_video, info=yt_info)

            if count == len(self.queue):
                raise ValueError

            self.__update_play_queue_order()

        except IndexError:
            if not feelinglucky:
                raise ValueError
            else:
                print_wrn("[YouTube] Could not find a mix for '{0}'. "\
                          "Searching YouTube instead. Feeling lucky?." \
                          .format(arg.encode('utf-8')))
                if yt_video.title:
                    self.enqueue_audio_search(yt_video.title)
                else:
                    self.enqueue_audio_stream(arg)

    def enqueue_audio_mix_search(self, arg):
        """Obtain a YouTube mix associated to a given textual search and add all the
        audio streams in the mix playlist to the playback queue.

        :param arg: a search string

        """
        logging.info('arg : %s', arg)
        try:
            query = generate_search_query(arg)
            wdata = pafy.call_gdata('search', query)

            wdata2 = wdata
            count = len(self.queue)
            for track_info in get_tracks_from_json(wdata2):
                if track_info and track_info.ytid:
                    try:
                        self.enqueue_audio_mix(track_info.ytid, feelinglucky=False)
                        break
                    except ValueError:
                        logging.info('Could not find a mix. Trying another video')

            if count == len(self.queue):
                raise ValueError

        except ValueError:
            raise ValueError(str("Could not find any mixes : %s" % arg))

    def enqueue_audio_channel_uploads(self, arg):
        """Add all audio streams in a YouTube channel to the playback queue.

        :param arg: a YouTube channel url

        """
        logging.info('arg : %s', arg)
        try:
            count = len(self.queue)

            channel = pafy.get_channel(arg)
            if channel:
                for yt_video in channel.uploads:
                    self.add_to_playback_queue(video=yt_video, \
                                               info=VideoInfo(ytid=yt_video.videoid, \
                                                              title=yt_video.title))

            if count == len(self.queue):
                raise ValueError

            self.__update_play_queue_order()

        except ValueError:
            raise ValueError(str("Channel not found : %s" % arg))

    def enqueue_audio_channel_playlist(self, channel_name, playlist_name):
        """Search a playlist within a channel and if found, adds all the audio streams
        to the playback queue.

        :param arg: a YouTube playlist id

        """
        logging.info('args : %s - %s', channel_name, playlist_name)
        try:
            count = len(self.queue)
            channel = pafy.get_channel(channel_name)
            if channel:
                pl_dict = dict()
                pl_titles = list()
                pl_name = ''
                playlist = None
                for pl in channel.playlists:
                    print_nfo("[YouTube] [Playlist] '{0}'." \
                              .format(to_ascii(pl.title)))
                    if fuzz.partial_ratio(playlist_name, pl.title) > 50:
                        pl_dict[pl.title] = pl
                        pl_titles.append(pl.title)

                if len(pl_titles) > 1:
                    pl_name = process.extractOne(playlist_name, pl_titles)[0]
                    playlist = pl_dict[pl_name]
                elif len(pl_titles) == 1:
                    pl_name = pl_titles[0]
                    playlist = pl_dict[pl_name]

                if pl_name:
                    if pl_name.lower() != playlist_name.lower():
                        print_wrn("[YouTube] Playlist '{0}' not found. " \
                                  "Playing '{1}' instead." \
                                  .format(to_ascii(playlist_name), \
                                          to_ascii(pl_name)))
                    for yt_video in playlist:
                        self.add_to_playback_queue(video=yt_video, \
                                                   info=VideoInfo(ytid=yt_video.videoid, \
                                                                  title=yt_video.title))

            if count == len(self.queue):
                raise ValueError

            self.__update_play_queue_order()

        except ValueError:
            raise ValueError(str("Channel not found : %s" % channel_name))

    def current_audio_stream_title(self):
        """ Retrieve the current stream's title.

        """
        stream = self.now_playing_stream
        title = ''
        if stream:
            title = to_ascii(stream['a'].title).encode("utf-8")
        return title

    def current_audio_stream_author(self):
        """ Retrieve the current stream's author.

        """
        stream = self.now_playing_stream
        author = ''
        if stream:
            author = to_ascii(stream['v'].author).encode("utf-8")
        return author

    def current_audio_stream_file_size(self):
        """ Retrieve the current stream's file size.

        """
        stream = self.now_playing_stream
        size = 0
        if stream:
            size = stream['a'].get_filesize()
        return size

    def current_audio_stream_duration(self):
        """ Retrieve the current stream's duration.

        """
        stream = self.now_playing_stream
        duration = ''
        if stream:
            duration = to_ascii(stream['v'].duration).encode("utf-8")
        return duration

    def current_audio_stream_bitrate(self):
        """ Retrieve the current stream's bitrate.

        """
        stream = self.now_playing_stream
        bitrate = ''
        if stream:
            bitrate = stream['a'].bitrate
        return bitrate

    def current_audio_stream_view_count(self):
        """ Retrieve the current stream's view count.

        """
        stream = self.now_playing_stream
        viewcount = 0
        if stream:
            viewcount = stream['v'].viewcount
        return viewcount

    def current_audio_stream_description(self):
        """ Retrieve the current stream's description.

        """
        stream = self.now_playing_stream
        description = ''
        if stream:
            description = to_ascii(stream['v'].description).encode("utf-8")
        return description

    def current_audio_stream_file_extension(self):
        """ Retrieve the current stream's file extension.

        """
        stream = self.now_playing_stream
        file_extension = ''
        if stream:
            file_extension = to_ascii(stream['a'].extension).encode("utf-8")
        return file_extension

    def current_audio_stream_video_id(self):
        """ Retrieve the current stream's video id.

        """
        stream = self.now_playing_stream
        video_id = ''
        if stream:
            video_id = to_ascii(stream['i'].ytid).encode("utf-8")
        return video_id

    def current_audio_stream_published(self):
        """ Retrieve the current stream's upload date and time.

        """
        stream = self.now_playing_stream
        if stream:
            published = to_ascii(stream['v'].published).encode("utf-8")
        return published

    def current_audio_stream_queue_index_and_queue_length(self):
        """ Retrieve index in the queue (starting from 1) of the current stream and the
        length of the playback queue.

        """
        return self.queue_index + 1, len(self.queue)

    def clear_queue(self):
        """ Clears the playback queue.

        """
        self.queue = list()
        self.queue_index = -1

    def remove_current_url(self):
        """Remove the currently active url from the playback queue.

        """
        logging.info("")
        if len(self.queue) and self.queue_index:
            stream = self.queue[self.queue_index]
            print_nfo("[YouTube] [Stream] '{0}' removed." \
                      .format(to_ascii(stream['i'].title).encode("utf-8")))
            del self.queue[self.queue_index]
            self.queue_index -= 1
            if self.queue_index < 0:
                self.queue_index = 0
            self.__update_play_queue_order()

    def next_url(self):
        """ Retrieve the url of the next stream in the playback queue.

        """
        logging.info("")
        try:
            if len(self.queue):
                self.queue_index += 1
                if (self.queue_index < len(self.queue)) \
                   and (self.queue_index >= 0):
                    next_stream = self.queue[self.play_queue_order \
                                            [self.queue_index]]
                    return self.__retrieve_stream_url(next_stream, self.queue_index).rstrip()
                else:
                    self.queue_index = -1
                    return self.next_url()
            else:
                return ''
        except (KeyError, AttributeError):
            # TODO: We don't remove this for now
            # del self.queue[self.queue_index]
            logging.info("KeyError, or AttributeError exception")
            return self.next_url()
        except (IOError):
            # Remove this video
            del self.queue[self.queue_index]
            logging.info("IOError exception")
            return self.next_url()

    def prev_url(self):
        """ Retrieve the url of the previous stream in the playback queue.

        """
        logging.info("")
        try:
            if len(self.queue):
                self.queue_index -= 1
                if (self.queue_index < len(self.queue)) \
                   and (self.queue_index >= 0):
                    prev_stream = self.queue[self.play_queue_order \
                                            [self.queue_index]]
                    return self.__retrieve_stream_url(prev_stream, self.queue_index).rstrip()
                else:
                    self.queue_index = len(self.queue)
                    return self.prev_url()
            else:
                return ''
        except (KeyError, AttributeError):
            # TODO: We don't remove this for now
            # del self.queue[self.queue_index]
            logging.info("exception")
            return self.prev_url()
        except (IOError):
            # Remove this video
            del self.queue[self.queue_index]
            logging.info("IOError exception")
            return self.next_url()

    def __update_play_queue_order(self):
        """ Update the queue playback order.

        A sequential order is applied if the current play mode is "NORMAL" or a
        random order if current play mode is "SHUFFLE"

        """
        total_streams = len(self.queue)
        if total_streams:
            if not len(self.play_queue_order):
                # Create a sequential play order, if empty
                self.play_queue_order = range(total_streams)
            if self.current_play_mode == self.play_modes.SHUFFLE:
                random.shuffle(self.play_queue_order)
            print_nfo("[YouTube] [Streams in queue] '{0}'." \
                      .format(total_streams))

    def __retrieve_stream_url(self, stream, queue_index):
        """ Retrieve a stream url

        """
        try:
            if not len(self.workers):
                for _ in range(WORKER_PROCESSES):
                    proc = Process(target=obtain_stream, \
                                   args=(self.task_queue, \
                                         self.done_queue)).start()
                    self.workers.append(proc)

            while not self.done_queue.empty():
                stream = self.done_queue.get()
                self.queue[stream['q']] = stream

            stream = self.queue[queue_index]
            if not stream.get('v') or not stream.get('a'):
                logging.info("ytid : %s", stream['i'].ytid)
                video = stream.get('v')
                if not video:
                    video = pafy.new(stream['i'].ytid)
                audio = video.getbestaudio(preftype="webm")
                if not audio:
                    logging.info("no suitable audio found")
                    raise AttributeError()
                stream.update({'a': audio, 'v': video})

            # streams = stream.get('v').audiostreams[::-1]
            # pprint.pprint(streams)
            # dump_stream_info(streams)

            self.now_playing_stream = stream
            return stream['a'].url.encode("utf-8")

        except AttributeError:
            logging.info("Could not retrieve the stream url!")
            raise

    def add_to_playback_queue(self, audio=None, video=None, info=None):
        """ Add to the playback queue. """

        if audio:
            print_nfo("[YouTube] [Stream] '{0}' [{1}]." \
                      .format(to_ascii(audio.title).encode("utf-8"), \
                              to_ascii(audio.extension)))
        if info:
            print_nfo("[YouTube] [Stream] '{0}'." \
                      .format(to_ascii(info.title).encode("utf-8")))
        queue_index = len(self.queue)
        self.task_queue.put(dict(a=audio, v=video, i=info, q=queue_index))
        self.queue.append(
            dict(a=audio, v=video, i=info, q=queue_index))
Пример #10
0
# 队列
tasks = Queue()

for i in range(9):
    tasks.put(str(i)*2)

def main(name):
    while True:
        time.sleep(1)
        if name.empty():
            print("name is over.")
            break
        else:
            thread = threading.current_thread().getName()
            print("线程:%s 打印:%s" % (thread, name.get()))

# 启动四个线程
pool = Pool(4, main, (tasks,)) 

while True:
    time.sleep(5)
    if tasks.empty():
        print("tasks is over.")
        # 终结线程池
        pool.terminate()
        break
    
print("main is over.")


Пример #11
0
class ParallelDownloader(URL_Fetcher):
    'Parallel threaded web page downloader'

    def __init__(self,
                 db_name,
                 proc_count,
                 site_base_url,
                 fUseCache=True,
                 fCacheSearchPages=True,
                 fUseCookies=False,
                 timeout=secHTTP_WAIT_TIMEOUT,
                 search_proc_count=2,
                 proxies=None):

        self.proxies = proxies
        self.queue = Queue()
        self.fSaveSearchPages = fCacheSearchPages
        self.site_base_url = site_base_url
        self.pool = Pool(processes=proc_count)

        self.search_queue = Queue()
        self.url_extract_pool = Pool(processes=search_proc_count)

        URL_Fetcher.__init__(self,
                             db_name,
                             fUseCache,
                             fUseCookies,
                             timeout=timeout,
                             proxies=proxies)

    def process_urls_from_search_queue(self):
        while not self.search_queue.empty():
            search_page_url = self.search_queue.get()
            #			logOut('search pages queue size: %d'%self.search_queue.qsize())
            logDbg('search page: %s' % search_page_url)

            search_page = self.get_page(search_page_url,
                                        fUseCache=self.fSaveSearchPages)
            rel_urls = extract_data_xpath(search_page, self.url_extract_xpath)
            #rel_urls = self.extract_page_xpath(self.url_extract_xpath, search_page_url)
            #			logOut('URLs from %s extracted'%search_page_url)

            logOut('%d urls extracted from [%s]. Queuing...' %
                   (len(rel_urls), search_page_url))
            logDbg('Extracted urls: %s. Queuing to download...' % rel_urls)
            list(map(self.queue.put, self.prefix_site_base_url(rel_urls)))

        self.queue.put(None)
        self.postprocess_search_page_list(rel_urls, search_page)

    def queue_pages(self, url_list):
        list(map(self.queue.put, url_list))

        # признак завершения очереди заданий
        self.queue.put(None)

    def postprocess_search_page_list(self, url, page):
        pass

    def prefix_site_base_url(self, rel_urls):
        return [self.site_base_url + url for url in rel_urls]

    def process_pages(self, page_processor, *add_processor_args):
        self.page_processor = page_processor
        self.add_pprocessor_args = add_processor_args
        self.pool.apply(self.process_page)

    def process_page(self):
        while True:
            url = self.queue.get()
            logDbg('Url got from queue: %s' % url)
            if not url:
                break

            page = self.get_page(url)  #, proxies=self.proxies

            #logOut('pp_arg_list: [%s]'%pp_arg_list)
            if page:
                self.page_processor(url, page, *self.add_pprocessor_args)