Пример #1
0
def process_play_list_step(message):
    try:
        chat_id = message.chat.id
        text = message.text
        if ":" not in text:
            bot.reply_to(message, 'Wrong message format')
            return

        data = str(text).split(':')
        plaid = data[0]
        title = data[1]

        if len(plaid) <= 5 or len(title) <= 5:
            bot.reply_to(message, 'Wrong message format')
            return

        api = Api(api_key=youtube_api_key)
        playlist_item_by_playlist = api.get_playlist_items(playlist_id=plaid,
                                                           count=1000)
        videos = playlist_item_by_playlist.items
        if len(videos) <= 0:
            bot.send_message(message.chat.id,
                             "No Vidoes found for playlist = " + str(title))
            return

        bot.send_message(message.chat.id,
                         "Found " + str(len(videos)) + " videos from youtube")
        real_video_count = len(videos)
        count = 0
        lessons = []
        for video in videos:
            video_by_id = api.get_video_by_id(
                video_id=video.snippet.resourceId.videoId,
                parts=('snippet', 'contentDetails', 'statistics'))
            if len(video_by_id.items) <= 0:
                real_video_count = real_video_count - 1
                continue
            count = count + 1
            item = video_by_id.items[0]
            title = title + " " + formatLessonName(
                count) + " " + item.snippet.title
            time_val = isodate.parse_duration(item.contentDetails.duration)
            code = getYoutubeEmbedCode(video.snippet.resourceId.videoId)
            lesson = Lesson(title, code, time_val)
            lessons.append(lesson)

        bot.send_message(
            message.chat.id,
            "Total avalibale vidoe count = " + str(real_video_count))
        if len(lessons) <= 0:
            bot.send_message(message.chat.id, "No lesson found!")
        loginAndUpdate(lessons, message, plaid)

    except Exception as e:
        bot.reply_to(message, str(e))
Пример #2
0
def fetchYoutubeData():
    processedChannelDataID = None

    try:
        successMessage('- Gathering youtube channel & video data...')

        api = Api(api_key=os.getenv('YOUTUBE_DATA_API_KEY'))
        channelById = api.get_channel_info(
            channel_id=os.getenv('YOUTUBE_CHANNEL_ID'))

        successMessage('- Fetched youtube channel & video data...')

        uploadsPlaylistId = channelById.items[
            0].contentDetails.relatedPlaylists.uploads
        allChannelVideos = api.get_playlist_items(
            playlist_id=uploadsPlaylistId, count=30, limit=30)
        successMessage('- Constructing youtube channel & video data...')

        processedData = []
        for video in allChannelVideos.items:
            processedData.append({
                "videoUrl": video.contentDetails.videoId,
                "videoTitle": video.snippet.channelTitle,
                "videoDescription": video.snippet.description,
            })

        successMessage('- Storing youtube video & channel data...')
        processedChannelDataID = saveDataToMongoDB(
            {
                "thumbnail":
                channelById.items[0].snippet.thumbnails.high.url,
                "channelName":
                channelById.items[0].snippet.title,
                "channelDescription":
                channelById.items[0].snippet.description,
                "keywords":
                channelById.items[0].brandingSettings.channel.keywords.split(),
                "resetAt":
                round(time.time())
            }, "youtubeChannelData")
        saveDataToMongoDB(
            {
                "_id": processedChannelDataID,
                "channelName": channelById.items[0].snippet.title,
                "videos": processedData,
                "resetAt": round(time.time()),
                "hasBeenProcessed": False
            }, "youtubeVideoData")
        successMessage('- Completed storing youtube video & channel data...')
    except:
        errorMessage('- An exception occurred')
    else:
        successMessage('- Completed youtube data step... ')

    return processedChannelDataID
Пример #3
0
def random_video(mood):
    api = Api(api_key=apikey)

    videos = []
    playlist_item_by_playlist = api.get_playlist_items(
        playlist_id=playlists_by_mood[mood], count=None).items
    for item in iter(playlist_item_by_playlist):
        resource = item.snippet.resourceId
        if resource.kind == 'youtube#video':
            videos.append(resource.videoId)

    video = videos[random.randint(0, len(videos) - 1)]
    return 'https://www.youtube.com/watch?v=' + video
Пример #4
0
def get_music_titles(playlistID):
    #Returns the music titles on the youtube's playlist

    api = Api(api_key=YOUTUBE_KEY)

    titles = api.get_playlist_items(playlist_id=playlistID, count=None)
    list_music = []
    for titulo in titles.items:
        musica = titulo.snippet.title.upper()

        list_music.append(musica)

    print(list_music)
    return list_music
Пример #5
0
def GetFridayFeature(ctx):
    api = Api(api_key=API)
    playlist_item_by_playlist = api.get_playlist_items(
        playlist_id="PLLUkxbIkknLuK6BLdOs-QDAJiDQTM7Xei", count=None)

    totalVideos = len(playlist_item_by_playlist.items)
    number = randrange(totalVideos)
    print(number)
    print(playlist_item_by_playlist.items[number].snippet.title)
    video = playlist_item_by_playlist.items[number]
    embedVar = discord.Embed(title=video.snippet.title,
                             url="https://www.youtube.com/watch?v=" +
                             video.contentDetails.videoId,
                             color=0x0ed0f1)
    embedVar.add_field(name="Published",
                       value=video.contentDetails.videoPublishedAt)
    embedVar.set_image(url=video.snippet.thumbnails.standard.url)

    return embedVar
Пример #6
0
start_range = datetime.datetime(start_date.year, start_date.month, start_date.day, 0, 0, 0, tzinfo=pytz.timezone("Asia/Tokyo"))
end_range = datetime.datetime(end_date.year, end_date.month, end_date.day, 23, 59, 59, tzinfo=pytz.timezone("Asia/Tokyo"))
date_str = start_date.strftime("%Y-%m-%d")

csv_file = open('./stream_stats_' + date_str + '.csv', 'w')
w = csv.writer(csv_file, delimiter=',')
w.writerow(['streamer', 'title', 'thumbnail', 'chat_users', 'eng_tl_msg_per_min', 'jp_tl_msg_per_min', 'not_jp_user_%', 'adj_kusa_per_min', 'humor_score', 'tete_per_usr', 'faq_count', 'marry_per_usr', 'most_kusa_tstamp', 'most_humor_tstamp', 'most_faq_tstamp', 'most_tete_tstamp'])
csv_file.flush()

def is_emoji(char):
    return char in emoji.UNICODE_EMOJI

#get video playlists from yt api
for name in NAMES:
    playlists[name] = []
    playlists[name]= api.get_playlist_items(playlist_id=PLAYLIST_IDS[pl_idx], count=50)
    pl_idx += 1

for key, val in playlists.items():
    for vid in val.items:
        pub_date = vid.contentDetails.videoPublishedAt
        pub_dt = dateutil.parser.isoparse(pub_date).astimezone(pytz.timezone("Asia/Tokyo"))

        if (start_range <= pub_dt <= end_range):
            kusa_count = 0
            tete_count = 0
            faq_count = 0
            marry_count = 0
            humor_count = 0
            all_users = []
            not_jp_user = []
Пример #7
0
apikey = 'AIzaSyC053n6_uqpUiOd1X4YfD0Vkx1QcTL-0R8'
playlist = 'PL_MH8gOS_ETiNT1NF8B46JYHZe6fXWfVW'

from pyyoutube import Api
import random
api = Api(api_key=apikey)

videos = []
playlist_item_by_playlist = api.get_playlist_items(playlist_id=playlist, count=None).items
for item in iter(playlist_item_by_playlist):
    resource = item.snippet.resourceId
    if resource.kind == 'youtube#video':
        videos.append(resource.videoId)
random_video = videos[random.randint(0, len(videos) - 1)]
print('https://www.youtube.com/watch?v='+ random_video)
Пример #8
0
#!/usr/bin/env python3

from pyyoutube import Api
import pytube
import urllib.request
from googleapiclient.discovery import build

api = Api(api_key='AIzaSyBagf-_AQk4bISDaXIkyD0cGCPZCLHYHuU')
playListId = "PLt0cfLFa-ZYzZB54dKA6EV2McLY26hNGJ"
downLoadCount = 70
playlist_item = api.get_playlist_items(playlist_id=playListId, count = downLoadCount) #222

# Get item id and title
itemList = []
for item in playlist_item.items:
    itemList.append([item.snippet.resourceId.videoId, item.snippet.title])

# Download videos
api_key='AIzaSyBagf-_AQk4bISDaXIkyD0cGCPZCLHYHuU'
youtube = build('youtube', 'v3', developerKey=api_key)

itemCounts = len(itemList)
print("==================================")
print(itemCounts," videos found.")
print("==================================")
count = 0
videoSubFolder = './Video'
for vid, vtitle in itemList:
    count = count + 1
    itemurl = 'https://www.youtube.com/watch?v=' + vid
    print(itemurl)
def main(channel_name="YaleCourses", load_from_file=False):
    # find all the videos

    api = Api(api_key="AIzaSyCw0j0aCe0y_T42q3RLBoVtGXlTOMGGaSM")
    # AIzaSyCw0j0aCe0y_T42q3RLBoVtGXlTOMGGaSM

    print("Setup dir to save the transcripts of %s channel" % (channel_name))
    channel_dir = os.path.join(raw_dir, "transcripts", channel_name)
    channel_id_file = os.path.join(raw_dir, "video_ids", channel_name + ".txt")

    if not os.path.exists(channel_dir):
        os.mkdir(channel_dir)
    else:
        print("\tThe folder of the channel %s is already exist\n"
              "\tdelete it before executing this script -"
              "we don't want to override your data" % (channel_name))
        return
    '''
        Since google is blocking after a while the retrival of the IDs,
        We will write the IDs to a file as a buffer for safety.
    '''
    if load_from_file is False:
        print("Retriving %s channel information" % (channel_name))
        channel_by_name = api.get_channel_info(channel_name=channel_name)
        print("\tFetch all the playlists")
        playlists_by_channel = api.get_playlists(
            channel_id=channel_by_name.items[0].id, count=None)
        print("\tFetch all the videos of the playlist")
        playlists_videos = []
        for playlist in playlists_by_channel.items:
            print("\t\tFetching videos IDs of playlist %s" % (playlist.id))
            playlists_videos.append(
                api.get_playlist_items(playlist_id=playlist.id, count=None))

        videos_ids = []
        for playlist in playlists_videos:
            for video in playlist.items:
                videos_ids.append(video.snippet.resourceId.videoId)
        print("We gathered now %s videos, saving save to file" %
              (len(videos_ids)))
        with open(channel_id_file, 'w') as f:
            json.dump(videos_ids, f)
    else:
        with open(channel_id_file, 'r') as f:
            videos_ids = json.load(f)

    print("Save %s channel videos transcripts" % (channel_name))
    #map(save_transcript,videos_ids)
    #[save_transcript(vd) for vd in videos_ids]

    for video_id in videos_ids:
        print("The video ID is %s" % (video_id))
        try:
            transcript_list = YouTubeTranscriptApi.list_transcripts(
                video_id)  #,languages=['en']
            #transcript_list = [transcript for transcript in transcript_list\
            #                   if bool(re.match(transcript.language,"[en]*"))]
            video_transcripts = None
            for transcript in transcript_list:
                # the Transcript object provides metadata properties
                print("Video id : ", transcript.video_id)
                print("\tlanguage : %s , language code : %s" %
                      (transcript.language, transcript.language_code))
                print("\tis_generated: %s, is_translatable: %s" %
                      (transcript.is_generated, transcript.is_translatable))
                if transcript.language_code == 'en' and transcript.is_generated is False:
                    actual_transcript = transcript.fetch()
                    video_transcripts = actual_transcript

            if video_transcripts is not None:
                #print( "Current length json of trancsript is " ,len(transcript))
                video_path = os.path.join(raw_dir, "transcripts", channel_name,
                                          video_id + ".json")
                with open(video_path, 'w') as outfile:
                    json.dump(video_transcripts, outfile)
        except Exception as e:
            print(e)

    print("Finish main")
Пример #10
0
from pyyoutube import Api
import pytube
import urllib.request
import os
import time
from googleapiclient.discovery import build
''' Environment variable setting '''
APIKey = 'you api key'
PlayListId = 'The playlist id'
PlayListContentNums = 200  #The number of videos in this playlist
''' Environment varialbe setting '''

api = Api(api_key=APIKey)
playlist_item = api.get_playlist_items(playlist_id=PlayListId,
                                       count=PlayListContentNums)

# Get item id and title
itemList = []
for item in playlist_item.items:
    itemList.append([item.snippet.resourceId.videoId, item.snippet.title])

# Download videos and transfer to MP3
from moviepy.editor import *

itemCounts = len(itemList)
print("==================================")
print(itemCounts, " videos found.")
print("==================================")
count = 0

Пример #11
0
class YTParser:
    """Получаем информацию из Ютуба
    """
    def __init__(self, api, audio_path, kaldi_path):
        self.api = Api(api_key=api)
        self.filename = False
        self.path = audio_path
        self.kaldi_path = kaldi_path

    def url2id(self, url):
        return url.split('watch?v=')[1]

    def id2url(self, id):
        return 'https://www.youtube.com/watch?v=' + id

    def get_latest_videos_by_channel_link(self, url):
        """Получаем ссылки на последние видео по именной ссылке на канал
        """
        channel_name = url.split('/user/')[1]
        channel_by_id = self.api.get_channel_info(channel_name=channel_name)
        channel_info = channel_by_id.items[0].to_dict()
        uploads_plst = channel_info['contentDetails']['relatedPlaylists'][
            'uploads']
        items = self.api.get_playlist_items(playlist_id=uploads_plst,
                                            count=100)
        videos = []
        for item in items.items:
            if item.snippet.resourceId.kind == 'youtube#video':
                videos.append({
                    'id':
                    item.snippet.resourceId.videoId,
                    'url':
                    self.id2url(item.snippet.resourceId.videoId)
                })
        return videos

    def _catch_filename(self, d):
        if d['status'] == 'finished':
            self.filename = os.path.splitext(d['filename'])[0] + '.mp3'

    def _downloaded_data(self):
        """Мета-данные скачанного видео
        """
        if self.filename == False:
            return False
        self.description_file = os.path.splitext(
            self.filename)[0] + '.info.json'
        with open(os.path.join(self.path, self.description_file)) as fp:
            description = json.load(fp)
        return {
            'id':
            description['id'],
            'uploader_url':
            description['uploader_url'],
            'channel_id':
            description['channel_id'],
            'channel_url':
            description['channel_url'],
            'upload_date':
            datetime.datetime.strptime(description['upload_date'], "%Y%m%d"),
            'title':
            description['title'],
            'description':
            description['description'],
            'webpage_url':
            description['webpage_url'],
            'view_count':
            description['view_count'],
            'like_count':
            description['like_count'],
            'dislike_count':
            description['dislike_count'],
            'average_rating':
            description['average_rating'],
        }

    def video2data(self, url):
        """Получаем распознанный текст ролика по его url
        """
        current_dir = os.getcwd()
        os.chdir(self.path)
        ydl_opts = {
            'format':
            'bestaudio/best',
            'writeinfojson':
            'info',
            'postprocessors': [{
                'key': 'FFmpegExtractAudio',
                'preferredcodec': 'mp3',
                'preferredquality': '192',
            }],
            'progress_hooks': [self._catch_filename],
        }
        with youtube_dl.YoutubeDL(ydl_opts) as ydl:
            ydl.download([url])

        time.sleep(20)
        video_description = self._downloaded_data()

        model = Model(self.kaldi_path)
        rec = KaldiRecognizer(model, 16000)

        process = subprocess.Popen([
            'ffmpeg', '-loglevel', 'quiet', '-i',
            os.path.join(self.path, self.filename), '-ar',
            str(16_000), '-ac', '1', '-f', 's16le', '-'
        ],
                                   stdout=subprocess.PIPE)

        full_text = ''
        while True:
            data = process.stdout.read(4000)
            if len(data) == 0:
                break
            if rec.AcceptWaveform(data):
                res = json.loads(rec.Result())
                full_text += ' ' + res['text']
        full_text += ' ' + json.loads(rec.FinalResult())['text']

        os.remove(os.path.join(self.path, self.description_file))
        os.remove(os.path.join(self.path, self.filename))

        os.chdir(current_dir)
        return full_text, video_description
Пример #12
0
    countryCode = MyConfigurations.COUNTRY_CODE
    playlistIDs = MyConfigurations.PLAYLIST_IDS
    displayUploader = MyConfigurations.DISPLAY_UPLOADER

    # Create this folder from the project root
    if not isdir('output'):
        mkdir('output')

    # Then create another nested folder. Example: output/output-2020-09-14T173551
    outputFolderPath = join(
        'output', 'output-' + strftime('%Y-%m-%dT%H%M%S', localtime()))
    mkdir(outputFolderPath)

    # Remember to comment out the individual playlist keys from MyConfigurations if you don't want to loop through them all
    for key in playlistIDs:
        playlistVideoItems = api.get_playlist_items(
            playlist_id=playlistIDs[key], count=None).items

        currentTimestamp = strftime(' %Y-%m-%dT%H%M%S', localtime())
        outputFileName = key + currentTimestamp + '.out'
        outputFilePath = join(outputFolderPath, outputFileName)

        with open(outputFilePath, 'w', encoding='utf8') as outfile:
            for index, playlistVideo in enumerate(playlistVideoItems):

                try:
                    video = api.get_video_by_id(
                        video_id=playlistVideo.contentDetails.videoId).items[0]

                    if not isVideoBlocked(countryCode, video):
                        outfile.write(video.snippet.title + '\n')
                        if displayUploader:
Пример #13
0
class EdumaBot():
    def __init__(self):
        self.driver = webdriver.Chrome()
        self.api = Api(api_key=apikey)
        self.db = DBHelper()

    def login(self):
        self.driver.get(url)

        time.sleep(1)
        login_btn = self.driver.find_element_by_xpath('//*[@id="wp-submit"]')
        email_field = self.driver.find_element_by_xpath(
            '//*[@id="user_login"]')
        pass_field = self.driver.find_element_by_xpath('//*[@id="user_pass"]')

        email_field.send_keys(username)
        time.sleep(1)
        pass_field.send_keys(password)
        login_btn.click()
        self.driver.find_element_by_xpath(
            '//*[@id="toplevel_page_learn_press"]').click()
        self.driver.find_element_by_xpath(
            '//*[@id="toplevel_page_learn_press"]/ul/li[3]/a').click()
        self.driver.find_element_by_xpath(
            '//*[@id="wpbody-content"]/div[3]/a').click()

    def start(self):
        self.login()
        courses = []
        # resume_lesons = self.db.get_resume_lessons()
        #
        # print(resume_lesons.rowcount)
        #
        # if resume_lesons.rowcount != -1:
        #     for ressume_lesson in resume_lesons:
        #         index = ressume_lesson[4]
        #         total = ressume_lesson[3]
        #         title = ressume_lesson[2]
        #         playlist_id = ressume_lesson[1]
        #
        #         if int(index) == int(total):
        #             continue
        #
        #         print("found lesons that is not added index = {} ,playlist = {},count = {} ".format(index, playlist_id,
        #                                                                                             total))
        #         courses.append(Course(title=str(title), playlistId=str(playlist_id), start=str(index + 1)))

        f = open("playlist", "r+")
        for i in f:
            k = i.split('\n')
            for j in k:
                vals = j.split("[|]")
                if len(vals) == 2:
                    courses.append(
                        Course(title=str(vals[1]),
                               playlistId=str(vals[0]),
                               start=0))

        if len(courses) == 0:
            print("no lesson found on file =  " + f.name)
            return

        print("course found on file " + str(len(courses)))

        for course in courses:

            finsihed = self.db.getFinishedPlayLists(
                playlist_id=course.playlistId)
            print(finsihed)
            if finsihed.rowcount != -1:
                print("course already exist titile =  " + course.title)
                continue

            playlist_item_by_playlist = self.api.get_playlist_items(
                playlist_id=course.playlistId, count=1000)
            videos = playlist_item_by_playlist.items
            print("number of vidoes in playlist = " + str(len(videos)))
            if len(videos) <= 0:
                print("no videos found for  playlist = " +
                      str(course.playlistId) + "\n")
                continue
            count = 1
            real_video_count = len(videos)
            lesson_list = []

            temp_counter = 0
            for video in videos:

                temp_counter = temp_counter + 1
                if temp_counter <= -1 and course.playlistId == "":
                    continue

                count = temp_counter

                video_by_id = self.api.get_video_by_id(
                    video_id=video.snippet.resourceId.videoId,
                    parts=('snippet', 'contentDetails', 'statistics'))
                if len(video_by_id.items) <= 0:
                    print("missed or private vidoe for  = " +
                          str(course.playlistId) + "\n")
                    real_video_count = real_video_count - 1
                    continue

                item = video_by_id.items[0]
                title = course.title + " " + self.formatLessonName(
                    count) + " " + item.snippet.title

                time_val = isodate.parse_duration(item.contentDetails.duration)
                code = self.getYoutubeEmbedCode(
                    video.snippet.resourceId.videoId)
                count = count + 1
                lesson = Lesson(title, code, time)

                title_field = self.driver.find_element_by_xpath(
                    '//*[@id="title"]')
                media_field = self.driver.find_element_by_xpath(
                    '//*[@id="_lp_lesson_video_intro"]')
                date_field = self.driver.find_element_by_xpath(
                    '//*[@id="_lp_duration"]')
                publish_button = self.driver.find_element_by_xpath(
                    '//*[@id="publish"]')
                select = Select(
                    self.driver.find_element_by_id('_lp_duration_select'))

                time_array = str(time_val).split(":")

                print(time_array[0] + " = hour")
                print(time_array[1] + " = minties")

                final_time = 1
                if time_array[0] != "0" and time_array[0] != "00":
                    select.select_by_value('hour')
                    final_time = time_array[0]
                elif time_array[1] != "0" and time_array[1] != "00":
                    select.select_by_value('minute')
                    final_time = time_array[1]
                else:
                    select.select_by_value('minute')

                title_field.clear()
                title_field.send_keys(lesson.title)
                time.sleep(1)
                media_field.clear()
                media_field.send_keys(lesson.code)
                time.sleep(1)
                date_field.clear()
                date_field.send_keys(final_time)
                time.sleep(2)
                self.driver.execute_script("arguments[0].click();",
                                           publish_button)
                self.driver.implicitly_wait(10)
                time.sleep(2)
                self.driver.find_element_by_xpath(
                    '//*[@id="wpbody-content"]/div[3]/a').click()
                self.driver.implicitly_wait(10)
                #self.db.update_lessons_resume(playlist_id=course.playlistId, index=str(temp_counter))

            # suffucly addded the playlsit
        # self.db.add_finished_playlist(course.playlistId)
        # self.db.delete_course_from_resume(playlist_id=course.playlistId)

    def formatLessonName(self, count):
        countStr = ""
        if count <= 9:
            countStr = "00" + str(count)
        elif count <= 99 and count >= 10:
            countStr = "0" + str(count)
        else:
            countStr = str(count)
        return countStr

    def getYoutubeEmbedCode(self, videoId):
        embedder = Embedder()
        code = embedder("https://www.youtube.com/watch?v=" + str(videoId),
                        width=video_width,
                        height=video_height)
        return code