def scribe(video_id: str, youtube_api_key: str): if not youtube_api_key: log.error( 'You need to provide an API key either by --youtube-api0key or by setting YOUTUBE_API_KEY' ) sys.exit(1) api = Api(api_key=youtube_api_key) video_metadata = api.get_video_by_id(video_id=video_id).items[0] #log.debug(json.dumps(video_metadata.to_dict(), sort_keys=True, indent=2)) title = video_metadata.snippet.title preview_image_path = get_preview_image( img_url=video_metadata.snippet.thumbnails.default.url, video_id=video_id) description = video_metadata.snippet.description date = datetime.datetime.strptime(video_metadata.snippet.publishedAt, "%Y-%m-%dT%H:%M:%S%z") captions = YouTubeTranscriptApi.get_transcript(video_id) print( gen_markdown_page(video_id=video_id, title=title, image_path=preview_image_path, description=description, date=date, captions=captions))
def get_all_comments(YOUTUBE_API_KEY, query, count_video=10, limit=30, maxResults=10, nextPageToken=''): """ Выгрузка maxResults комментариев """ api = Api(api_key=YOUTUBE_API_KEY) video_by_keywords = api.search_by_keywords(q=query, search_type=["video"], count=count_video, limit=limit) videoId = [x.id.videoId for x in video_by_keywords.items] comments_all = [] for id_video in videoId: try: data = get_data(YOUTUBE_API_KEY, id_video, maxResults=maxResults, nextPageToken=nextPageToken) comment = list(get_text_of_comment(data)) comments_all.append(comment) except: continue comments = sum(comments_all, []) return comments
def get_channel_info(username): api = Api(api_key='redacted') channel_by_name = api.get_channel_info(channel_name=username) try: response = channel_by_name.items[0].to_dict() except: return 'User doesnt exist' channel_name = response["snippet"]["localized"]["title"] created_at = response["snippet"]["publishedAt"] pfp_url = response["snippet"]["thumbnails"]["default"]["url"] view_count = response["statistics"]["viewCount"] subscribers = response["statistics"]["subscriberCount"] video_count = response["statistics"]["videoCount"] userid = response["id"] description = response["brandingSettings"]["channel"]["description"] match = re.findall(r'[\w\.-]+@[\w\.-]+', str(description)) if len(match) > 0: emails = ','.join(match) else: emails = "None found" data = { "name": channel_name, "created_at": parser.parse(created_at).timestamp(), "pfp_url": pfp_url, "total_views": view_count, "subsribers": subscribers, "video_count": video_count, "userid": userid, "emails": emails, "url": f'https://www.youtube.com/channel/{userid}' } return data
def get_amount_of_videos_uploaded(): """ :return: Returns the amount of videos vakantie vincent has uploaded. """ api = Api(api_key=get_youtube_api_key()) channel_by_id = api.get_channel_info(channel_id=channel_id) return channel_by_id.items[0].to_dict()["statistics"]["videoCount"]
def get_latest_video(self, youtubeChannelName): res = {} apiKey = 'AIzaSyBOCLFDDz4wHFmatH-fPxsjjRnBfPzcOFQ' try: api = YoutubeApi(api_key=apiKey) r = api.search_by_keywords(q=youtubeChannelName, search_type=["channel"], count=2, limit=2) idChannel = r.items[0].snippet.channelId link = 'https://www.googleapis.com/youtube/v3/search?key=' + apiKey + '&channelId=' + idChannel + '&part=snippet,id&order=date&maxResults=2' resp = requests.get(url=link) data = resp.json() if (data): res['result'] = data['items'][0] else: res['result'] = { 'error': 'Youtube url not updated', 'msg': 'Channel not found.' } return jsonify(res) except Exception as e: print("error : get_channel_ID\n", str(e), flush=True) res['error'] = "internal error" res['message'] = 'Youtube Data API exceded' return res
def __init__(self, API_KEY, JSON_PATH): # Declare an API object using our API_KEY self.api = Api(api_key=API_KEY) self.data_path = JSON_PATH with open(JSON_PATH) as f: self.database = json.load(f)
def get_number_subscribers_youtube_channel(self, youtubeChannelName): res = {} apiKey = 'AIzaSyBOCLFDDz4wHFmatH-fPxsjjRnBfPzcOFQ' try: api = YoutubeApi(api_key=apiKey) r = api.search_by_keywords(q=youtubeChannelName, search_type=["channel"], count=2, limit=2) idChannel = r.items[0].snippet.channelId channel = api.get_channel_info(channel_id=idChannel) if (channel.items): res['result'] = { 'subscriberCount': channel.items[0].statistics.subscriberCount, 'channel': channel.items[0].snippet } else: res['result'] = { 'error': 'Youtube url not updated', 'msg': 'Channel not found.' } return jsonify(res) except Exception as e: print("error : get_number_subscribers_youtube_channel\n", str(e), flush=True) res['error'] = "internal error" res['message'] = 'Youtube Data API exceded' return res
def youtube(request): api = Api(api_key="AIzaSyDHAS3sDLVtUqM1vx-kxykrBHMVSi0BLJI") query = request.session['user-input'] res = api.search_by_keywords(q=query, search_type=["channel"], count=25, limit=8) res = res.to_dict() res_items = res["items"] result = [] for data in res_items: temp = { "channel_name": data["snippet"]["title"], "channel_url": "https:/www.youtube.com/channel/" + str(data["snippet"]["channelId"]), "channel_logo": data["snippet"]["thumbnails"]["default"]["url"] } result.append(temp) context = {"result": result, "text": query} return render(request, 'youtube.html', context)
class YTSearch(): def __init__(self, api_key=API_KEY): self.key = api_key self.api = Api(api_key=self.key) def check_video_eligibility(self, ids, expected_type): like_max = {"yt_id": "", "youtube_rating": 0.0} for id in ids: video = list( self.api.get_video_by_id(video_id=id).to_dict().items()) try: vid = video[5][1][0] duration = isodate.parse_duration( vid['contentDetails']['duration']).total_seconds() definition = vid['contentDetails']['definition'] like_count = float(vid["statistics"]["likeCount"]) dislike_count = float(vid["statistics"]["dislikeCount"]) duration_min = Duration[expected_type][0] duration_max = Duration[expected_type][1] like_percentage = like_count / (like_count + dislike_count) channel_title = vid["snippet"]["channelTitle"] if like_percentage > like_max["youtube_rating"] and (duration_min < duration <= duration_max) and \ channel_title != 'YouTube Movies': like_max["yt_id"] = id like_max["yt_duration"] = duration like_max["yt_likes"] = like_count like_max["youtube_rating"] = like_percentage like_max["yt_definition"] = definition except Exception as e: print("ignoring the error") continue return like_max def obsolete_get_stats(self, key_word, expected_type="movie", max_results=10): videos = list( self.api.search_by_keywords(q=key_word, search_type=["video"]).items) vids = [vid.to_dict()["id"]['videoId'] for vid in videos] ret = self.check_video_eligibility(vids, expected_type) print(ret) def get_youtube_stats(self, key_word, expected_type="movie", max_results=10): get_mov_stats = YoutubeSearch(key_word, max_results=max_results).to_dict() ids = [d['id'] for d in get_mov_stats] ret = self.check_video_eligibility(ids, expected_type) for mov in get_mov_stats: if mov["id"] == ret["yt_id"]: ret["youtube_link"] = "https://youtube.com" + mov["link"] return (ret)
def process_play_list_step(message): try: chat_id = message.chat.id text = message.text if ":" not in text: bot.reply_to(message, 'Wrong message format') return data = str(text).split(':') plaid = data[0] title = data[1] if len(plaid) <= 5 or len(title) <= 5: bot.reply_to(message, 'Wrong message format') return api = Api(api_key=youtube_api_key) playlist_item_by_playlist = api.get_playlist_items(playlist_id=plaid, count=1000) videos = playlist_item_by_playlist.items if len(videos) <= 0: bot.send_message(message.chat.id, "No Vidoes found for playlist = " + str(title)) return bot.send_message(message.chat.id, "Found " + str(len(videos)) + " videos from youtube") real_video_count = len(videos) count = 0 lessons = [] for video in videos: video_by_id = api.get_video_by_id( video_id=video.snippet.resourceId.videoId, parts=('snippet', 'contentDetails', 'statistics')) if len(video_by_id.items) <= 0: real_video_count = real_video_count - 1 continue count = count + 1 item = video_by_id.items[0] title = title + " " + formatLessonName( count) + " " + item.snippet.title time_val = isodate.parse_duration(item.contentDetails.duration) code = getYoutubeEmbedCode(video.snippet.resourceId.videoId) lesson = Lesson(title, code, time_val) lessons.append(lesson) bot.send_message( message.chat.id, "Total avalibale vidoe count = " + str(real_video_count)) if len(lessons) <= 0: bot.send_message(message.chat.id, "No lesson found!") loginAndUpdate(lessons, message, plaid) except Exception as e: bot.reply_to(message, str(e))
def fetchYoutubeData(): processedChannelDataID = None try: successMessage('- Gathering youtube channel & video data...') api = Api(api_key=os.getenv('YOUTUBE_DATA_API_KEY')) channelById = api.get_channel_info( channel_id=os.getenv('YOUTUBE_CHANNEL_ID')) successMessage('- Fetched youtube channel & video data...') uploadsPlaylistId = channelById.items[ 0].contentDetails.relatedPlaylists.uploads allChannelVideos = api.get_playlist_items( playlist_id=uploadsPlaylistId, count=30, limit=30) successMessage('- Constructing youtube channel & video data...') processedData = [] for video in allChannelVideos.items: processedData.append({ "videoUrl": video.contentDetails.videoId, "videoTitle": video.snippet.channelTitle, "videoDescription": video.snippet.description, }) successMessage('- Storing youtube video & channel data...') processedChannelDataID = saveDataToMongoDB( { "thumbnail": channelById.items[0].snippet.thumbnails.high.url, "channelName": channelById.items[0].snippet.title, "channelDescription": channelById.items[0].snippet.description, "keywords": channelById.items[0].brandingSettings.channel.keywords.split(), "resetAt": round(time.time()) }, "youtubeChannelData") saveDataToMongoDB( { "_id": processedChannelDataID, "channelName": channelById.items[0].snippet.title, "videos": processedData, "resetAt": round(time.time()), "hasBeenProcessed": False }, "youtubeVideoData") successMessage('- Completed storing youtube video & channel data...') except: errorMessage('- An exception occurred') else: successMessage('- Completed youtube data step... ') return processedChannelDataID
def get_youtube_statistics(video_ids): api_key = frappe.db.get_single_value("Video Settings", "api_key") api = Api(api_key=api_key) try: video = api.get_video_by_id(video_id=video_ids) video_stats = video.items return video_stats except Exception: title = "Failed to Update YouTube Statistics" frappe.log_error(title + "\n\n" + frappe.get_traceback(), title=title)
def random_video(mood): api = Api(api_key=apikey) videos = [] playlist_item_by_playlist = api.get_playlist_items( playlist_id=playlists_by_mood[mood], count=None).items for item in iter(playlist_item_by_playlist): resource = item.snippet.resourceId if resource.kind == 'youtube#video': videos.append(resource.videoId) video = videos[random.randint(0, len(videos) - 1)] return 'https://www.youtube.com/watch?v=' + video
async def find_first_youtube_match(keyword: str): youtube = Api(api_key=os.environ['YOUTUBE_TOKEN']) results = youtube.search_by_keywords( q=keyword, search_type=('video', ), count=1, ).items if len(results): msg = f'https://www.youtube.com/watch?v={results[0].id.videoId}' else: msg = 'I have found nothing' return msg
def get_music_titles(playlistID): #Returns the music titles on the youtube's playlist api = Api(api_key=YOUTUBE_KEY) titles = api.get_playlist_items(playlist_id=playlistID, count=None) list_music = [] for titulo in titles.items: musica = titulo.snippet.title.upper() list_music.append(musica) print(list_music) return list_music
def set_youtube_statistics(self): api_key = frappe.db.get_single_value("Video Settings", "api_key") api = Api(api_key=api_key) try: video = api.get_video_by_id(video_id=self.youtube_video_id) video_stats = video.items[0].to_dict().get('statistics') self.like_count = video_stats.get('likeCount') self.view_count = video_stats.get('viewCount') self.dislike_count = video_stats.get('dislikeCount') self.comment_count = video_stats.get('commentCount') except Exception: title = "Failed to Update YouTube Statistics for Video: {0}".format( self.name) frappe.log_error(title + "\n\n" + frappe.get_traceback(), title=title)
def GetFridayFeature(ctx): api = Api(api_key=API) playlist_item_by_playlist = api.get_playlist_items( playlist_id="PLLUkxbIkknLuK6BLdOs-QDAJiDQTM7Xei", count=None) totalVideos = len(playlist_item_by_playlist.items) number = randrange(totalVideos) print(number) print(playlist_item_by_playlist.items[number].snippet.title) video = playlist_item_by_playlist.items[number] embedVar = discord.Embed(title=video.snippet.title, url="https://www.youtube.com/watch?v=" + video.contentDetails.videoId, color=0x0ed0f1) embedVar.add_field(name="Published", value=video.contentDetails.videoPublishedAt) embedVar.set_image(url=video.snippet.thumbnails.standard.url) return embedVar
def get_channel_ID(self, youtubeChannelName): res = {} apiKey = 'AIzaSyBOCLFDDz4wHFmatH-fPxsjjRnBfPzcOFQ' try: api = YoutubeApi(api_key=apiKey) r = api.search_by_keywords(q=youtubeChannelName, search_type=["channel"], count=2, limit=2) idChannel = r.items[0].snippet.channelId if (idChannel): res['result'] = idChannel else: res['result'] = { 'error': 'Youtube url not updated', 'msg': 'Channel not found.' } return jsonify(res) except Exception as e: print("error : get_channel_ID\n", str(e), flush=True) res['error'] = "internal error" res['message'] = 'Youtube Data API exceded' return res
def main(): api = Api( api_key=API_KEY, client_secret=CLIENT_SECRET, client_id=CLIENT_ID, ) comments = api.get_comment_threads(all_to_channel_id=CHANNEL_ID, count=SEARCH_AMOUNT).items comments_with_replies = [ comment for comment in comments if comment.replies ] for comment in comments_with_replies: is_faker = False has_already_been_informed = False # Check if there is a fake account in a reply for reply in comment.replies.comments: reply_name = reply.snippet.authorDisplayName reply_id = reply.snippet.authorChannelId if reply_name.lower() in FAKE_NAMES: # Check if it's a faker if reply_id.value != CHANNEL_ID: # Faker found! is_faker = True if reply.snippet.textOriginal == NOTIFY_MESSAGE: has_already_been_informed = True if is_faker and not has_already_been_informed: video_id = comment.snippet.videoId comment_id = comment.id url = build_comment_link(video_id, comment_id) print(f"Faker found on video {url}")
def get_audio(url=None, file_name=None, index_file=None, since=None, limit=None, prefix_name=None, prefix_num=None): api = Api(api_key=conf['api_key']) if index_file and os.path.exists(index_file): return get_audios_from_indexed_list(api, store_dir, index_file, since, limit, prefix_name, prefix_num) vid = get_video_id(url) if not vid: return 'Please provide video ID or full URL' return get_audio_by_id(api, store_dir, vid, file_name)
def youtube_data(group): """Runs all the YouTube related tasks It scrapes data from YouTube for the whole group and the single artists Args: group: dictionary with the data of the group to scrape Returns: the same group dictionary with updated data """ print("[{}] Starting tasks...".format(module)) api = Api(api_key=youtube_api_key) # Getting channel data and stats channel_data = youtube_get_channel(api, group["youtube"]["url"]) group["youtube"] = youtube_check_channel_change(group["youtube"], channel_data, group["hashtags"]) # Getting video data and stats videos = youtube_get_videos(api, group["youtube"]["playlist"], group["youtube"]["name"]) group["youtube"]["videos"] = youtube_check_videos_change( group["name"], group["youtube"]["videos"], videos, group["hashtags"]) # Getting Youtube data for each member for member in group["members"]: if "youtube" in member: channel_data = youtube_get_channel(api, member["youtube"]["url"]) member["youtube"] = youtube_check_channel_change( member["youtube"], channel_data, member["hashtags"]) videos = youtube_get_videos(api, member["youtube"]["playlist"], member["youtube"]["name"]) member["youtube"]["videos"] = youtube_check_videos_change( member["name"], member["youtube"]["videos"], videos, member["hashtags"]) print() return group
def __init__(self, api_key=API_KEY): self.key = api_key self.api = Api(api_key=self.key)
def __init__(self, bot): self.bot: commands.Bot = bot self.api: Api = Api(api_key=os.environ.get("YOUTUBEKEY")) self.pagetoken = None self.randompool = defaultdict(list)
from urllib.parse import urlparse import isodate from django.conf import settings from pyyoutube import Api YOUTUBE_KEY = settings.YOUTUBE_KEY api = Api(api_key=YOUTUBE_KEY) def get_youtube_data(url): if "youtube.com/" in url: new_data = {} query_data = urlparse(url).query video_id = "".join(query_data.split("v=")).split("&")[0] video = api.get_video_by_id(video_id=video_id) video_data = video.items[0].to_dict() title = video_data["snippet"]["title"] description = video_data["snippet"]["description"] image_thumbnails = video_data["snippet"]["thumbnails"] if image_thumbnails["maxres"] is None: preview = image_thumbnails["high"]["url"] else: preview = image_thumbnails["maxres"]["url"] duration = isodate.parse_duration( video_data["contentDetails"]["duration"] ) author = video_data["snippet"]["channelTitle"] date = video_data["snippet"]["publishedAt"] new_data.update(
def main(channel_name="YaleCourses", load_from_file=False): # find all the videos api = Api(api_key="AIzaSyCw0j0aCe0y_T42q3RLBoVtGXlTOMGGaSM") # AIzaSyCw0j0aCe0y_T42q3RLBoVtGXlTOMGGaSM print("Setup dir to save the transcripts of %s channel" % (channel_name)) channel_dir = os.path.join(raw_dir, "transcripts", channel_name) channel_id_file = os.path.join(raw_dir, "video_ids", channel_name + ".txt") if not os.path.exists(channel_dir): os.mkdir(channel_dir) else: print("\tThe folder of the channel %s is already exist\n" "\tdelete it before executing this script -" "we don't want to override your data" % (channel_name)) return ''' Since google is blocking after a while the retrival of the IDs, We will write the IDs to a file as a buffer for safety. ''' if load_from_file is False: print("Retriving %s channel information" % (channel_name)) channel_by_name = api.get_channel_info(channel_name=channel_name) print("\tFetch all the playlists") playlists_by_channel = api.get_playlists( channel_id=channel_by_name.items[0].id, count=None) print("\tFetch all the videos of the playlist") playlists_videos = [] for playlist in playlists_by_channel.items: print("\t\tFetching videos IDs of playlist %s" % (playlist.id)) playlists_videos.append( api.get_playlist_items(playlist_id=playlist.id, count=None)) videos_ids = [] for playlist in playlists_videos: for video in playlist.items: videos_ids.append(video.snippet.resourceId.videoId) print("We gathered now %s videos, saving save to file" % (len(videos_ids))) with open(channel_id_file, 'w') as f: json.dump(videos_ids, f) else: with open(channel_id_file, 'r') as f: videos_ids = json.load(f) print("Save %s channel videos transcripts" % (channel_name)) #map(save_transcript,videos_ids) #[save_transcript(vd) for vd in videos_ids] for video_id in videos_ids: print("The video ID is %s" % (video_id)) try: transcript_list = YouTubeTranscriptApi.list_transcripts( video_id) #,languages=['en'] #transcript_list = [transcript for transcript in transcript_list\ # if bool(re.match(transcript.language,"[en]*"))] video_transcripts = None for transcript in transcript_list: # the Transcript object provides metadata properties print("Video id : ", transcript.video_id) print("\tlanguage : %s , language code : %s" % (transcript.language, transcript.language_code)) print("\tis_generated: %s, is_translatable: %s" % (transcript.is_generated, transcript.is_translatable)) if transcript.language_code == 'en' and transcript.is_generated is False: actual_transcript = transcript.fetch() video_transcripts = actual_transcript if video_transcripts is not None: #print( "Current length json of trancsript is " ,len(transcript)) video_path = os.path.join(raw_dir, "transcripts", channel_name, video_id + ".json") with open(video_path, 'w') as outfile: json.dump(video_transcripts, outfile) except Exception as e: print(e) print("Finish main")
from pyyoutube import Api import sys sys.path.append("./chat-replay-downloader") from chat_replay_downloader import get_chat_replay, get_youtube_messages import vtuber_list import csv import dateutil.parser import datetime import pytz import re import emoji import pandas as pd from datetime import timedelta from collections import Counter api = Api(api_key='GOOGLE_API_KEY_HERE') NAMES=['AZki', 'Miko', 'Roboco', 'Sora', 'Suisei', 'Mel', 'Haato', 'Fubuki', 'Matsuri', 'Aki', 'Shion', 'Aqua', 'Ayame', 'Choco', 'ChocoSub', 'Subaru', 'Korone', 'Mio', 'Okayu', 'Noel', 'Rushia', 'Pekora', 'Flare', 'Marine', 'Luna', 'Coco', 'Watame', 'Kanata', 'Towa', 'Lamy', 'Nene', 'Botan', 'Polka', 'Calli', 'Kiara', 'Ina', 'Gura', 'Amelia'] PLAYLIST_IDS=['UU0TXe_LYZ4scaW2XMyi5_kw', 'UU-hM6YJuNYVAmUWxeIr9FeA', 'UUDqI2jOz0weumE8s7paEk6g', 'UUp6993wxpyDPHUpavwDFqgg', 'UU5CwaMl1eIgY8h02uZw7u8A', 'UUD8HOxPs4Xvsm8H0ZxXGiBw', 'UU1CfXB_kRs3C-zaeTG3oGyg', 'UUdn5BQ06XqgXoAxIhbqw5Rg', 'UUQ0UDLQCjY0rmuxCDE38FGg', 'UUFTLzh12_nrtzqBPsTCqenA', 'UUXTpFs_3PqI41qX2d9tL2Rw', 'UU1opHUrw8rvnsadT-iGp7Cg', 'UU7fk0CB07ly8oSl0aqKkqFg', 'UU1suqwovbL1kzsoaZgFZLKg', 'UUp3tgHXw_HI0QMk1K8qh3gQ', 'UUvzGlP9oQwU--Y0r9id_jnA', 'UUhAnqc_AY5_I3Px5dig3X1Q', 'UUp-5t9SrOQwXMU7iIjQfARg', 'UUvaTdHTWBGv3MKj3KVqJVCw', 'UUdyqAaZDKHXg4Ahi7VENThQ', 'UUl_gCybOJRIgOXw6Qb4qJzQ', 'UU1DCedRgGHBdm81E1llLhOQ', 'UUvInZx9h3jC2JzsIzoOebWg', 'UUCzUftO8KOVkV4wQG1vkUvg', 'UUa9Y57gfeY0Zro_noHRVrnw', 'UUS9uQI-jC3DE0L4IpXyvr6w', 'UUqm3BQLlJfvkTsX_hvm0UmA', 'UUZlDXzGoo7d44bwdNObFacg', 'UU1uv2Oq6kNxgATlCiez59hw', 'UUFKOVgVbGmX65RxO3EtH3iw', 'UUAWSyEs_Io8MtpY3m-zqILA', 'UUUKD-uaobj9jiqB-VXt71mA', 'UUK9V2B22uJYu3N7eR_BT9QA', 'UUL_qhgtOy0dy1Agp8vkySQg', 'UUHsx4Hqa-1ORjQTh9TYDhww', 'UUMwGHR0BTZuLsmjY_NT5Pwg',
apikey = 'AIzaSyC053n6_uqpUiOd1X4YfD0Vkx1QcTL-0R8' playlist = 'PL_MH8gOS_ETiNT1NF8B46JYHZe6fXWfVW' from pyyoutube import Api import random api = Api(api_key=apikey) videos = [] playlist_item_by_playlist = api.get_playlist_items(playlist_id=playlist, count=None).items for item in iter(playlist_item_by_playlist): resource = item.snippet.resourceId if resource.kind == 'youtube#video': videos.append(resource.videoId) random_video = videos[random.randint(0, len(videos) - 1)] print('https://www.youtube.com/watch?v='+ random_video)
#!/usr/bin/env python3 from pyyoutube import Api import pytube import urllib.request from googleapiclient.discovery import build api = Api(api_key='AIzaSyBagf-_AQk4bISDaXIkyD0cGCPZCLHYHuU') playListId = "PLt0cfLFa-ZYzZB54dKA6EV2McLY26hNGJ" downLoadCount = 70 playlist_item = api.get_playlist_items(playlist_id=playListId, count = downLoadCount) #222 # Get item id and title itemList = [] for item in playlist_item.items: itemList.append([item.snippet.resourceId.videoId, item.snippet.title]) # Download videos api_key='AIzaSyBagf-_AQk4bISDaXIkyD0cGCPZCLHYHuU' youtube = build('youtube', 'v3', developerKey=api_key) itemCounts = len(itemList) print("==================================") print(itemCounts," videos found.") print("==================================") count = 0 videoSubFolder = './Video' for vid, vtitle in itemList: count = count + 1 itemurl = 'https://www.youtube.com/watch?v=' + vid print(itemurl)
import os import pandas as pd from pyyoutube import Api PLAYLIST_ID = "PLvHyFbz_PpaZ7833xPxXXPgSi50phCH-P" api = Api(api_key=os.environ["GOOGLE_API_KEY"]) def fetch_youtube_data(): video_list = [] playlist = api.get_playlist_items(playlist_id=PLAYLIST_ID) next_page = playlist.nextPageToken video_list.extend(playlist.items) while next_page: playlist = api.get_playlist_items(playlist_id=PLAYLIST_ID, page_token=next_page) next_page = playlist.nextPageToken video_list.extend(playlist.items) dates = [] titles = [] views = [] for video in video_list: videos = api.get_video_by_id( video_id=video.contentDetails.videoId).items if videos: video = videos[0] dates.append(video.snippet.publishedAt) titles.append(video.snippet.title)
class Video_Search_Json: """Class to retrieve videos This class helps query and search our video database using an API YouTube key if needed to pull from Youtube. Automatically updates the json file. """ def __init__(self, API_KEY, JSON_PATH): # Declare an API object using our API_KEY self.api = Api(api_key=API_KEY) self.data_path = JSON_PATH with open(JSON_PATH) as f: self.database = json.load(f) def search_by_keywords(self, subtopic, channels=[Channel['MIT']], num_videos=10, force_query=False, include_transcripts=True): """Searches for videos by subtopic Takes in a query string to search for on youtube. Returns a JSON. Parameters: subtopic -- the subtopic to search for in the query channels -- a list of Channel enums to specify which channels that must be included (default MIT OpenCourseWare) num_vidoes -- minimum number of videos to include (default 10) force_query -- whether or not to query regardless of inclusion in json (default False) include_transcripts -- whether or not to include transcripts (default True) """ if subtopic not in self.database.keys() and not force_query: # YouTube retrieve self.query_youtube(subtopic, channels, num_videos=num_videos, include_transcripts=include_transcripts) self.write_to_json(self.database) return self.database[subtopic] def query_youtube(self, subtopic, channels=[], num_videos=5, include_transcripts=True, search_count=50): """Query Youtube for a subtopic Queries the YouTube database for 5 videos pertaining to a certain subtopic. Automatically re-queries if a video doesn't include a transcript if transcripts are required. Parameters: subtopic -- the topic to search for channels -- specifically which Channel enums to also include from the Channels class (default empty) num_videos -- the number of videos to return. Could be more if a channels argument is non-empty (default 5) include_transcripts -- requires videos to have transcripts (default True) count -- the number if videos to query for everytime (default 50) """ assert num_videos >= 0 # Number of videos cannot be negative # Query Youtube and add videos pertaining to the subtopic r = self.api.search_by_keywords(q=subtopic, search_type=["video"], count=search_count, limit=search_count, video_caption="any", video_duration=["any"]) # Add list of videos to database videos = [] # Creates deep copy of Channel(s) to keep track of which ones are included includes_channels = [] for channel_enum in channels: includes_channels.append(channel_enum.name) # Maximum amount of videos to include (one video from each channel plus top 5 from results) video_counter = num_videos for vid in r.items: should_append = False # Check to see if max number of videos has been reached if video_counter <= 0 and len(includes_channels) == 0: break # Filter the video from the YouTube API filtered_video = self.filter_video_information(vid.to_dict()) # Conditions for when to add a video if filtered_video["channelId"] in includes_channels: # Remove minimum one video from channel requirement if video found includes_channels.remove(filtered_video["channelId"]) should_append = True elif video_counter > 0: # Add video if we still need to add videos to reach minimum number of videos should_append = True if not should_append: continue # Include transcripts if specified if include_transcripts and should_append: filtered_video["transcript"] = self.get_youtube_transcript(filtered_video["videoId"]) if filtered_video["transcript"] == None: continue if should_append: # Add in other fields filtered_video["url"] = "www.youtube.com/watch?v=" + filtered_video["videoId"] filtered_video["source"] = "Youtube" filtered_video["difficulty"] = 3 # Default difficulty level # Add video to list videos.append(filtered_video) video_counter -= 1 # Decrement video_counter for minimum number of videos to include # Add filtered videos into the database (mutates) self.database[subtopic] = videos def filter_video_information(self, video, keys=["publishedAt", "channelId", "title", "description", "channelTitle", "videoId"]): """Filters video dict for certain keys Filters a YouTube Video entry to only include a certain number of keys specified by a keys list taken from the YouTube API. Parameters: video -- the video information as a dictionary to filter through keys -- the keys to include (default ["publishedAt", "channelId", "title", "description", "channelTitle", "videoId]) """ new_video = {} self.recur_dict(video, new_video, keys) # Recursively loop through nested dictionaries and put everything on first layer return new_video def get_youtube_transcript(self, video_id): """Returns video's transcript from YouTube Returns the video's transcripts given the video_id on YouTube. Returns None if no transcript was found. This functionality is included in order to check whether or not a video holds a transcript. Parameters: video_id -- the id of the video on YouTube. Can be found after the "v=" part in the link. """ # Try grabbing the raw translation using the YouTubeTranscriptApi raw_trans = [] try: raw_trans = YouTubeTranscriptApi.get_transcript(video_id, languages=["en"]) except (Exception): print(video_id, "excluded") return None # Use only the text portion of transcript and throwaway time transcript = "" for i in raw_trans: transcript += i["text"] + " " return transcript def recur_dict(self, data, output, keys_to_include): """Recursively loop through nested dict Recursive function for reading nested dictionaries and retrieving the keys to a one-layer dictionary. """ # Loop through key value pair in dictionary for key, value in data.items(): if isinstance(value, dict): # If the value is a dict, recursively loop self.recur_dict(value, output, keys_to_include) elif key in keys_to_include: # If value is not a dictionary, add to new dict output[key] = value def write_to_json(self, data_dict): """Write dictionary to JSON file Writes the file into a JSON and includes Exception protection and null dictionary protection. Parameters: data_dict -- the dictionary to write to json """ try: # Only write data to JSON if it is non Null if data_dict: with open(self.data_path, 'w') as json_file: json.dump(data_dict, json_file) except (json.decoder.JSONDecodeError): print("Error Writing to Json File, Dictionary improperly formatted.")