예제 #1
0
def main():
    from youtube_api import YouTubeDataAPI

    api_key = "AIzaSyAB5Dg1HeqzlV6y2d37-tektKrts2bNOSc"
    yt = YouTubeDataAPI(api_key)

    zout = yt.get_video_metadata(video_id='k7DGeWlKu0Q')
    Num_likes = zout['video_like_count']
    Num_dislikes = zout['video_dislike_count']
    Num_comments = zout['video_comment_count']
    zcomment = yt.get_video_comments(video_id='k7DGeWlKu0Q')
    Commentlist = []
    for item in zcomment:
        Commentlist.append(item['text'])
    zcaption = yt.get_captions(video_id='k7DGeWlKu0Q')
    Transcript = zcaption['caption']
    Result1 = {
        'Num_likes': Num_likes,
        'Num_dislikes': Num_dislikes,
        'Num_comments': Num_comments,
        'Transcript': Transcript,
        'Commentlist': Commentlist
    }
    Result2 = yt.search(q='vaccine',
                        published_after=datetime.datetime(2018, 1, 1),
                        published_before=datetime.datetime(2019, 1, 1))
    return Result1, Result2
                              datetime.datetime(2019, 6, 30)))

#converts data into a pandas dataframe
df_searches_2019 = pd.DataFrame(searches_2019)

#print data for videos
pd.options.display.max_rows = 100
df_searches_2019.to_string(index=False)
df_searches_2019.iloc[:, 0].tolist()
df_searches_2019

#gets more information about the videos based on the video's video_id
searches_2019_more_info = []
video_id_2019 = df_searches_2019.iloc[:, 0]
for i in range(0, number_of_results):
    searches_2019_more_info_singluar = yt.get_video_metadata(
        video_id_2019[i], part=['statistics', 'snippet'])
    searches_2019_more_info_singluar['number'] = i
    searches_2019_more_info.append(searches_2019_more_info_singluar)

#converts data into a pandas dataframe and prints it out
searches_2019_more_info = pd.DataFrame(searches_2019_more_info)
searches_2019_more_info

#process of counting how many times either nothing is in the description, or the words "Para, Paralympic, Adaptive, Adapted, Disabled, Disability, Differently abled, Disability friendly, Wheelchair Accessible, and Inclusive" appeard.
#search is case sensitive, so also did lowercase search
None_2019 = 0
Para_2019 = 0
para_2019 = 0
Paralympic_2019 = 0
paralympic_2019 = 0
Adaptive_2019 = 0
예제 #3
0
class SuperchatArchiver:
    def __init__(self,vid_id, api_key, gen_WC = False, loop = None):
        self.cancelled = False
        self.loop = loop
        self.t_pool = concurrent.futures.ThreadPoolExecutor(max_workers=5)
        self.api_points_used = 1.0
        self.api = YouTubeDataAPI(api_key) #uses 1p to check key
        self.videoid = vid_id
        self.channel_id = ""
        self.metadata = {}
        self.videoinfo = {}
        self.stats = []
        self.dict_list = []
        self.gen_wc = gen_WC
        self.clean_currency = {"¥": "JPY",
                          "NT$": "TWD",
                          "$": "USD",
                          "CA$": "CAD",
                          "MX$": "MXN",
                          "HK$": "HKD",
                          "A$": "AUD",
                          "£": "GBP",
                          "€": "EUR",
                          "R$": "BRL",
                          "₹": "INR",
                          "\u20b1": "PHP"}

        self.metadata = self.get_video_info(self.videoid)
        self.api_points_used += 1.0
        self.channel_id = self.metadata["channelId"]
        self.running = True
        if self.metadata is not None:
            self.videoinfo = self.metadata
            self.channel_id = self.metadata["channelId"]
        else:
            exit(-1)

        pathlib.Path('./' + self.channel_id + '/sc_logs').mkdir(parents=True, exist_ok=True)
        pathlib.Path('./' + self.channel_id + '/vid_stats').mkdir(parents=True, exist_ok=True)

    def get_video_info(self,video_ID:str):
        try:
            response = self.api.get_video_metadata(video_id=video_ID, parser=None,
                                                   part=["liveStreamingDetails", "contentDetails", "snippet"])
            api_metadata = {"channel": response["snippet"]["channelTitle"],
                            "channelId": response["snippet"]["channelId"],
                            "id": video_ID,
                            "title": response["snippet"]["title"],
                            "live": response["snippet"]["liveBroadcastContent"],
                            "caught_while": response["snippet"]["liveBroadcastContent"],
                            "publishDateTime": datetime.strptime(response["snippet"]["publishedAt"] + " +0000",
                                                                 "%Y-%m-%dT%H:%M:%SZ %z").timestamp()}
            delta = isodate.parse_duration(response["contentDetails"]["duration"])
            api_metadata["length"] = delta.total_seconds()
            if 'liveStreamingDetails' in response.keys():
                api_metadata["liveStreamingDetails"] = {}
                for d in response["liveStreamingDetails"].keys():
                    if "Time" in d or "time" in d:
                        api_metadata["liveStreamingDetails"][d] = datetime.strptime(
                            response["liveStreamingDetails"][d] + " +0000", "%Y-%m-%dT%H:%M:%SZ %z").timestamp()
            return api_metadata

        except Exception as e:
            print(self.videoid)
            print(e)
            return None

    async def async_get_video_info(self,video_ID:str):
        api_metadata = await self.loop.run_in_executor(self.t_pool,self.get_video_info,video_ID)
        self.api_points_used += 1.0
        self.channel_id = api_metadata["channelId"]
        return api_metadata

    def cancel(self):
        self.cancelled = True

    async def main(self):
        if not self.loop:
            self.loop = asyncio.get_running_loop()
        self.chat_err = True
        retries = 0
        while self.chat_err and not self.cancelled:
            if "liveStreamingDetails" in self.videoinfo.keys() or self.videoinfo["live"] != "none":
                self.stats.clear()
                #self.dict_list.clear()
                self.chat_err = False
                test_file = pathlib.Path(self.channel_id+"/sc_logs/"+self.videoid + ".txt")
                if test_file.is_file():
                    if test_file.stat().st_size > 2:
                        await self.log_output(self.videoinfo["channel"]+" - " + self.videoinfo["title"]+" already analyzed, skipping. Existing file size: "+str(test_file.stat().st_size)+" bytes")
                        continue
                f = open(self.channel_id+"/sc_logs/"+self.videoid + ".txt", "w")
                f_stats = open(self.channel_id+"/vid_stats/"+self.videoid + "_stats.txt", "w")
                analysis_ts = datetime.now(tz=pytz.timezone('Europe/Berlin'))
                await self.log_output("Started Analysis at: "+analysis_ts.isoformat())
                await self.log_output("Analyzing Video " + datetime.fromtimestamp(self.videoinfo["publishDateTime"],timezone.utc).isoformat() + " " +self.videoinfo["channel"]+" - " + self.videoinfo["title"] + " ["+self.videoid+"]")
                chat = LiveChatAsync(self.videoid, callback = self.display, processor = (SuperChatLogProcessor(), SuperchatCalculator()))
                while chat.is_alive() and not self.cancelled:
                    await asyncio.sleep(3)
                newmetadata = await self.async_get_video_info(self.videoid) #when livestream chat parsing ends, get some more metadata
                if newmetadata["live"] in ["upcoming","live"]: #in case the livestream has not ended yet!
                    await self.log_output(datetime.now(tz=pytz.timezone('Europe/Berlin')).isoformat()+": Error! Chat monitor ended prematurely!")
                    await self.log_output(chat.is_alive())
                    self.chat_err = True
                if self.videoinfo["caught_while"] in ["upcoming","live"]:
                    #use newer metadata while rescuing certain fields from the old metadata
                    createdDateTime = self.videoinfo["publishDateTime"]
                    caught_while = self.videoinfo["caught_while"]
                    old_title = self.videoinfo["title"]
                    if newmetadata is not None:
                        self.videoinfo = newmetadata
                        self.videoinfo["createdDateTime"] = createdDateTime
                        self.videoinfo["caught_while"] = caught_while
                        if self.videoinfo["title"] != old_title:
                            self.videoinfo["old_title"] = old_title
                    else:
                        exit(-1)
                self.videoinfo["startedLogAt"] = analysis_ts.timestamp()
                await self.log_output("writing to files")
                f.write(json.dumps(self.dict_list))
                f.close()
                if self.chat_err:
                    self.stats.append(await self.loop.run_in_executor(self.t_pool,recount_money,self.dict_list))
                f_stats.write(json.dumps([self.videoinfo,self.stats[-1:]]))
                f_stats.close()
                if self.chat_err:
                    os.rename(f.name, f.name+".err"+str(retries))
                    os.rename(f_stats.name, f_stats.name + ".err"+str(retries))
                    retries +=1
                if not self.chat_err and retries == 0 and self.gen_wc and len(self.dict_list) > 0:
                    await self.loop.run_in_executor(self.t_pool,self.generate_wordcloud,f.name)
            else:
                await self.log_output(self.videoinfo["title"]+" is not a broadcast recording or premiere")

    async def display(self,data,amount):
        if len(data.items) > 0:
            for c in data.items: #data.items contains superchat messages - save them in list while also saving the calculated
                #sums in a list
                if c.type == "superChat" or c.type == "superSticker":
                    if c.currency in self.clean_currency.keys():
                        c.currency = self.clean_currency[c.currency]
                    sc_datetime = datetime.fromtimestamp(c.timestamp/1000.0,timezone.utc)
                    sc_weekday = sc_datetime.weekday()
                    sc_hour = sc_datetime.hour
                    sc_minute = sc_datetime.minute
                    sc_user = c.author.name
                    sc_userid = c.author.channelId
                    sc_message = c.message
                    sc_color = c.bgColor
                    sc_currency = c.currency.replace(u'\xa0', '')
                    sc_info = {"time":c.timestamp,"currency":sc_currency,"value":c.amountValue,"weekday":sc_weekday,
                               "hour":sc_hour,"minute":sc_minute,"user":sc_user, "userid":sc_userid, "message":sc_message,
                               "color":sc_color, "debugtime":sc_datetime.isoformat()}
                    self.stats.append(amount)
                    self.dict_list.append(sc_info)
            await self.log_output(
                self.videoinfo["channel"] + " " + self.videoinfo["title"] + " " + data.items[-1].elapsedTime + " " +
                str(amount["amount_sc"]))

    def generate_wordcloud(self,filepath):
        wordcloudmake = superchat_wordcloud(filepath)
        wordcloudmake.generate()

    async def log_output(self,logmsg):
        await self.loop.run_in_executor(self.t_pool,print,logmsg)
예제 #4
0
# topic information
TOPIC = 'Coronavirus'
SEARCH_TERMS = ['coronavirus']
# ['Judy Mikovits','Bill Gates coronavirus','QAnon coronavirus','Coronavirus Vaccination',
#                'Wuhan lab', 'bioweapon coronavirus','5G coronavirus','coronavirus flu'
#                 ,'dean koontz darkness']  ##adding eveloped search terms here
VIDEOS_PER_QUERY = 10

# Check youtube api
API_KEY = 'your api key'

yt_api = YouTubeDataAPI(API_KEY)
yt_api.verify_key()

Metadata = lambda vid_list: yt_api.get_video_metadata(vid_list)
''' collect videos' metadata
'''


def get_metadata(vid_list, VID_SEEN):
    if not vid_list: return [], [0]
    try:
        metadata_list = Metadata(vid_list)
    except Exception as exc:
        print('>>> cannot retrieve the metadata_list for [{}...] as {}'.format(
            vid_list[0], exc))
        return [], [0]
    VID_SEEN = set(list(VID_SEEN))
    return metadata_list, [len(vid_list)]
예제 #5
0
# For each playlist in the array,
# create a directory inside downloads and
# download each video into that directory
for playlist in playlists:
    playlist_dir = playlist['title']
    if not os.path.exists(playlist_dir):
        os.mkdir(playlist_dir)

    # Change directory so that we can save the videos in the proper place
    os.chdir(playlist_dir)

    # Download all episodes for this playlist
    playlist = yt.get_videos_from_playlist_id(playlist['playlist_id'])
    for videoix, v in enumerate(playlist):
        try:
            video = yt.get_video_metadata(v['video_id'])
            title = str(videoix) + '. ' + video['video_title']
            link = 'http://youtube.com/watch?v=' + video['video_id']
            try:
                with youtube_dl.YoutubeDL(
                    {'outtmpl':
                     str(videoix + 1) + '. %(title)s.%(ext)s'}) as ydl:
                    result = ydl.download([link])
            except Exception as e:
                print("Exception on download: %s" % e)
        except Exception as e:
            print("Exception on get_video_metadata: %s" % e)

    # Switch back to the root dir of project
    os.chdir(root_dir)
class SuperchatArchiver:
    def __init__(self,vid_id, api_key, gen_WC = False, loop = None, file_suffix = ".standalone.txt", minutes_wait = 30, retry_attempts = 72, min_successful_attempts = 2, logger = None):
        self.total_counted_msgs = 0
        self.total_new_members = 0
        self.max_retry_attempts = retry_attempts
        self.min_successful_attempts = min_successful_attempts
        self.file_suffix = file_suffix
        self.minutes_wait = minutes_wait
        self.started_at = None
        self.ended_at = None
        self.cancelled = False
        self.loop = loop
        self.t_pool = concurrent.futures.ThreadPoolExecutor(max_workers=100)
        self.api_points_used = 1.0
        self.api = YouTubeDataAPI(api_key) #uses 1p to check key
        self.videoid = vid_id
        self.channel_id = ""
        self.metadata = {}
        self.videoinfo = {}
        self.donors = {}
        self.stats = []
        self.sc_msgs = set()
        self.sc_logs_list = []
        self.metadata_list = []
        self.gen_wc = gen_WC
        self.unique_donors = {}
        self.clean_currency = {"¥": "JPY",
                          "NT$": "TWD",
                          "$": "USD",
                          "CA$": "CAD",
                          "MX$": "MXN",
                          "HK$": "HKD",
                          "A$": "AUD",
                          "£": "GBP",
                          "€": "EUR",
                          "R$": "BRL",
                          "₹": "INR",
                          "\u20b1": "PHP",
                          "\u20aa": "ILS"}

        self.metadata = self.get_video_info(self.videoid)
        self.api_points_used += 1.0
        self.total_member_msgs = 0
        self.running = True
        self.running_chat = None
        if self.metadata is not None:
            self.videoinfo = self.metadata
            self.videoinfo["retries_of_rerecording_had_scs"] = 0
            self.videoinfo["retries_of_rerecording"] = 0
            self.videoPostedAt = copy.deepcopy(self.videoinfo["publishDateTime"])
            self.channel_id = self.metadata["channelId"]
        else:
            self.videoPostedAt = 0
            self.channel_id = "privatted-deleted-memebershipped"
        self.skeleton_dict = {"channel": None,
                              "channelId": None,
                              "id": None,
                              "title": None,
                              "live": None,
                              "caught_while": None,
                              "publishDateTime": None,
                              "length": None,
                              "endedLogAt": None,
                              "retries_of_rerecording": None,
                              "retries_of_rerecording_had_scs": None,
                              "createdDateTime": None,
                              "liveStreamingDetails":{"scheduledStartTime": None,
                                                      "actualStartTime": None,
                                                      "actualEndTime": None}
                             }
        self.sc_file = self.channel_id + "/sc_logs/" + self.videoid + ".txt"+self.file_suffix
        self.donor_file = self.channel_id + "/vid_stats/donors/" + self.videoid + ".txt"+self.file_suffix
        self.stats_file = self.channel_id + "/vid_stats/" + self.videoid + "_stats.txt"+self.file_suffix
        #await self.log_output((self.metadata, self.channel_id, self.videoid, self.file_suffix))
        pathlib.Path('./' + self.channel_id + '/vid_stats/donors').mkdir(parents=True, exist_ok=True)
        pathlib.Path('./' + self.channel_id + '/sc_logs').mkdir(parents=True, exist_ok=True)
        self.placeholders = 0
        if logger:
            self.logger = logger
        else:
            self.logger = logging.getLogger(__name__)
            self.logger.setLevel(logging.DEBUG)
            fh = logging.FileHandler('./' + self.channel_id +"/"+args.yt_vid_id+'.applog')
            fh.setLevel(logging.DEBUG)
            ch = logging.StreamHandler()
            ch.setLevel(logging.INFO)
            dbg_formatter = config.mylogger.MyFormatter()
            formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
            fh.setFormatter(dbg_formatter)
            ch.setFormatter(formatter)
            self.logger.addHandler(fh)
            self.logger.addHandler(ch)

    def __str__(self):
        return "["+self.videoid+"] " + self.videoinfo["channel"] + " - " + self.videoinfo["title"] + " - Running: "+str(self.running) + " Live: " + self.videoinfo["live"]
    
    def __repr__(self):
        return "["+self.videoid+"] " + self.videoinfo["channel"] + " - " + self.videoinfo["title"] + " - Running: "+str(self.running) + " Live: " + self.videoinfo["live"]

    def get_video_info(self,video_ID:str):
        response = None
        try:
            response = self.api.get_video_metadata(video_id=video_ID, parser=None,
                                                   part=["liveStreamingDetails", "contentDetails", "snippet"])
            api_metadata = {"channel": response["snippet"]["channelTitle"],
                            "channelId": response["snippet"]["channelId"],
                            "id": video_ID,
                            "title": response["snippet"]["title"],
                            "live": response["snippet"]["liveBroadcastContent"],
                            "caught_while": response["snippet"]["liveBroadcastContent"],
                            "publishDateTime": datetime.strptime(response["snippet"]["publishedAt"] + " +0000",
                                                                 "%Y-%m-%dT%H:%M:%SZ %z").timestamp()}
            delta = isodate.parse_duration(response["contentDetails"]["duration"])
            api_metadata["length"] = delta.total_seconds()
            if 'liveStreamingDetails' in response.keys():
                api_metadata["liveStreamingDetails"] = {}
                for d in response["liveStreamingDetails"].keys():
                    if "Time" in d or "time" in d:
                        api_metadata["liveStreamingDetails"][d] = datetime.strptime(
                            response["liveStreamingDetails"][d] + " +0000", "%Y-%m-%dT%H:%M:%SZ %z").timestamp()
            return api_metadata

        except Exception as e:
            print(self.videoid)
            print(e)
            print(response)
            return None

    async def async_get_video_info(self,video_ID:str):
        self.api_points_used += 1.0
        api_metadata = await self.loop.run_in_executor(self.t_pool,self.get_video_info,video_ID)
        return api_metadata

    def cancel(self):
        self.cancelled = True
        if self.running_chat:
            self.running_chat.terminate()
        

    async def update_psql_metadata(self):
        async with self.conn.transaction():
            await self.conn.execute(
                "UPDATE video SET caught_while = $2, live = $3, title = $4,"
                "retries_of_rerecording = $5, retries_of_rerecording_had_scs = $6 WHERE video_id = $1",
                self.videoid, self.videoinfo["caught_while"], self.videoinfo["live"],
                self.videoinfo["title"], self.videoinfo["retries_of_rerecording"],
                self.videoinfo["retries_of_rerecording_had_scs"])
            if "scheduledStartTime" in self.videoinfo["liveStreamingDetails"].keys():
                await self.conn.execute("UPDATE video SET scheduledstarttime = $2 WHERE video_id = $1",
                                        self.videoid, datetime.fromtimestamp(
                        self.videoinfo["liveStreamingDetails"]["scheduledStartTime"], timezone.utc))
            if "actualStartTime" in self.videoinfo["liveStreamingDetails"].keys():
                await self.conn.execute("UPDATE video SET actualstarttime = $2 WHERE video_id = $1",
                                        self.videoid, datetime.fromtimestamp(
                        self.videoinfo["liveStreamingDetails"]["actualStartTime"], timezone.utc))
            if "actualEndTime" in self.videoinfo["liveStreamingDetails"].keys():
                await self.conn.execute("UPDATE video SET actualendtime = $2 WHERE video_id = $1",
                                        self.videoid, datetime.fromtimestamp(
                        self.videoinfo["liveStreamingDetails"]["actualEndTime"], timezone.utc))
            if "old_title" in self.videoinfo.keys():
                await self.conn.execute("UPDATE video SET old_title = $2 WHERE  video_id = $1", self.videoid,
                                        self.videoinfo["old_title"])
            if "length" in self.videoinfo.keys():
                await self.conn.execute("UPDATE video SET length = $2 WHERE  video_id = $1", self.videoid,
                                        self.videoinfo["length"])
            if "publishDateTime" in self.videoinfo.keys():
                await self.conn.execute("UPDATE video SET publishDateTime = $2 WHERE video_id = $1",
                                        self.videoid, datetime.fromtimestamp(self.videoinfo["publishDateTime"],
                                                                             timezone.utc))
            if "endedLogAt" in self.videoinfo.keys():
                await self.conn.execute("UPDATE video SET endedLogAt = $2 WHERE video_id = $1",
                                        self.videoid, self.ended_at)
                
    async def already_done(self,conn):
        row = await conn.fetchrow('SELECT retries_of_rerecording_had_scs, retries_of_rerecording FROM video WHERE video_id = $1', self.videoid)
        successful_sc_recordings = 0
        repeats = 0
        if row:
            successful_sc_recordings = row["retries_of_rerecording_had_scs"] if row["retries_of_rerecording_had_scs"] else 0
            repeats = row["retries_of_rerecording"] if row["retries_of_rerecording"] else 0
        test_file = pathlib.Path(self.sc_file)
        file_has_content = False
        if test_file.is_file():
            if test_file.stat().st_size > 2:
                file_has_content = True
        if successful_sc_recordings >= 2 and file_has_content:
            return True, test_file.stat().st_size, successful_sc_recordings, repeats
        else:
            return False, 0, successful_sc_recordings, repeats

    async def main(self):
        if not self.loop:
            self.loop = asyncio.get_running_loop()
        await self.log_output(self.videoinfo,10)
        pgsql_config_file = open("postgres-config.json")
        pgsql_creds = json.load(pgsql_config_file)
        self.conn = await asyncpg.connect(user = pgsql_creds["username"], password = pgsql_creds["password"], host = pgsql_creds["host"], database = pgsql_creds["database"])
        old_meta_row = await self.conn.fetchrow('SELECT c.name, channel_id, title, caught_while, live, old_title, length, createdDateTime, publishDateTime, startedLogAt, endedLogAt, scheduledStartTime, actualStartTime, actualEndTime, retries_of_rerecording, retries_of_rerecording_had_scs FROM video INNER JOIN channel c on channel_id = c.id WHERE video_id = $1', self.videoid)
        old_meta = dict(old_meta_row) if old_meta_row else None
        if old_meta:
            old_time_meta = {"scheduledStartTime": old_meta["scheduledstarttime"].timestamp() if old_meta["scheduledstarttime"] else 0,
                             "actualStartTime": old_meta["actualstarttime"].timestamp() if old_meta["actualstarttime"] else 0,
                             "actualEndTime": old_meta["actualendtime"].timestamp() if old_meta["actualendtime"] else 0}
            if self.videoinfo:
                for time in old_time_meta.keys():
                    if "liveStreamingDetails" in self.videoinfo.keys():
                        if time in self.videoinfo["liveStreamingDetails"].keys():
                            if not old_time_meta[time] and self.videoinfo["liveStreamingDetails"][time]:
                                old_time_meta[time] = self.videoinfo["liveStreamingDetails"][time]
            time_meta_keys = list(old_time_meta.keys())
            for timekey in time_meta_keys:
                if not old_time_meta[timekey]:
                    old_time_meta.pop(timekey)
            old_meta["liveStreamingDetails"] = old_time_meta
            if not self.videoinfo:
                self.videoinfo = copy.deepcopy(self.skeleton_dict)
            await self.log_output(self.videoinfo,10)
            if self.videoinfo["title"] != old_meta["title"] and self.videoinfo["title"]:
                old_meta["old_title"] = old_meta["title"]
                old_meta["title"] = self.videoinfo["title"]
            old_meta_keys_l = [k.lower() for k in old_meta.keys()]
            old_meta_keys_n = [k for k in old_meta.keys()]
            old_meta_keys = dict(zip(old_meta_keys_l, old_meta_keys_n))
            #await self.log_output(old_meta_keys,10)
            for info in self.skeleton_dict.keys():
                if info.lower() in old_meta_keys_l:
                    if type(old_meta[old_meta_keys[info.lower()]]) is datetime:
                        self.videoinfo[info] = old_meta[old_meta_keys[info.lower()]].timestamp()
                    elif old_meta[old_meta_keys[info.lower()]]:
                        self.videoinfo[info] = old_meta[old_meta_keys[info.lower()]]
                    elif old_meta[old_meta_keys[info.lower()]] is None and "time" in info.lower():
                        if info in self.videoinfo.keys():
                            await self.log_output((info,"key found", self.videoinfo[info],self.videoinfo.keys()))
                            self.videoinfo[info] = self.videoinfo[info] if self.videoinfo[info] else 0
                        else:
                            await self.log_output((info,"key not found",self.videoinfo[info],self.videoinfo.keys()))
                            self.videoinfo[info] = 0
                    else:
                        await self.log_output("else case",10)
            self.channel_id = old_meta["channel_id"]
            self.videoinfo["channel"] = old_meta["name"]
            self.videoinfo["channelId"] = self.channel_id
            self.videoinfo["id"] = self.videoid
            self.videoPostedAt = self.videoinfo['publishDateTime']
            self.metadata_list.append(self.videoinfo)
            self.ended_at = old_meta["endedlogat"] if old_meta["endedlogat"] else None
            self.videoinfo["endedLogAt"] = self.ended_at.timestamp() if self.ended_at else None
            if self.metadata:
                self.videoinfo["live"] = self.metadata["live"]
        await self.log_output(self.videoinfo)
        if not self.videoinfo:
            await self.conn.close()
            return
        self.insert_channels = await self.conn.prepare("INSERT INTO channel(id, name, tracked) VALUES ($1,$2,$3) "
                                                       "ON CONFLICT DO NOTHING")
        self.channel_name_history = await self.conn.prepare("INSERT INTO chan_names(id, name, time_discovered, time_used) "
                                                            "VALUES ($1,$2,$3,$4) ON CONFLICT (id,name) DO UPDATE SET time_used = $4")
        self.insert_messages = await self.conn.prepare("INSERT INTO messages(video_id, chat_id, user_id, message_txt, "
                                                       "time_sent, currency, value, color) "
                                                       "VALUES ($1,$2,$3,$4,$5,$6,$7,$8) ON CONFLICT DO NOTHING")
        async with self.conn.transaction():
            if self.channel_id and self.videoinfo["channel"]:
                await self.conn.execute("INSERT INTO channel VALUES($1,$2,$3) ON CONFLICT (id) DO UPDATE SET tracked = $3",
                                       self.channel_id, self.videoinfo["channel"], True)
                await self.conn.execute("INSERT INTO chan_names VALUES($1,$2,$3) ON CONFLICT DO NOTHING",
                                        self.channel_id, self.videoinfo["channel"],
                                        datetime.now(tz=pytz.timezone('Europe/Berlin')))
        self.chat_err = True
        repeats = 0
        log_exist_test, filesize, db_retries_had_scs, repeats = await self.already_done(self.conn)
        self.videoinfo["retries_of_rerecording_had_scs"] = db_retries_had_scs
        self.videoinfo["retries_of_rerecording"] = repeats
        if log_exist_test:
            await self.log_output(self.videoinfo["channel"] + " - " + self.videoinfo[
                    "title"] + " already analyzed, skipping. Existing file size: " + str(
                    filesize) + " bytes")
            return
        had_scs = db_retries_had_scs if db_retries_had_scs else 0
        self.msg_counter = 0
        islive = True
        while (repeats < self.max_retry_attempts and had_scs < self.min_successful_attempts and not self.cancelled and islive):
            self.msg_counter = 0
            self.chat_err = True
            if self.metadata:
                islive = self.metadata["live"] in ["upcoming","live"]
            while self.chat_err and not self.cancelled:
                if "liveStreamingDetails" in self.videoinfo.keys() or self.videoinfo["live"] != "none" or repeats >= 1:
                    self.stats.clear()
                    self.chat_err = False
                    self.started_at = datetime.now(tz=pytz.timezone('Europe/Berlin'))
                    publishtime = datetime.fromtimestamp(self.videoPostedAt,timezone.utc)
                    async with self.conn.transaction():
                        await self.conn.execute(
                            "INSERT INTO video (video_id,channel_id,title,startedlogat,createddatetime) "
                            "VALUES($1,$2,$3,$4,$5) ON CONFLICT DO NOTHING",
                            self.videoid, self.videoinfo["channelId"], self.videoinfo["title"], self.started_at, publishtime)
                    await self.update_psql_metadata()
                    await self.log_output("Started Analysis #"+str(repeats+1)+" at: "+self.started_at.isoformat())
                    await self.log_output("of video " + publishtime.isoformat() + " " +self.videoinfo["channel"]+" - " + self.videoinfo["title"] + " ["+self.videoid+"]")
                    if repeats >= 1:
                        await self.log_output("Recording the YouTube-archived chat after livestream finished")
                    self.httpclient = httpx.AsyncClient(http2=True)
                    self.running_chat = LiveChatAsync(self.videoid, callback = self.display, processor = (SuperChatLogProcessor(), SuperchatCalculator()),logger=self.logger, client = self.httpclient, exception_handler = self.exception_handling)
                    while self.running_chat.is_alive() and not self.cancelled:
                        await asyncio.sleep(3)
                    if type(self.running_chat.exception) is exceptions.InvalidVideoIdException or type(self.running_chat.exception) is exceptions.ChatParseException:
                        #Video ID invalid: Private or Membership vid or deleted. Treat as cancelled
                        #ChatParseException: No chat found
                        self.cancelled = True
                    if repeats == 0 and not self.chat_err and not self.cancelled and islive:
                        self.ended_at = datetime.now(tz=pytz.timezone('Europe/Berlin'))
                        self.videoinfo["endedLogAt"] = self.ended_at.timestamp()
                    await self.httpclient.aclose()
                    newmetadata = await self.async_get_video_info(self.videoid) #when livestream chat parsing ends, get some more metadata
                    if newmetadata is not None:
                        if newmetadata["live"] in ["upcoming","live"]: #in case the livestream has not ended yet!
                            await self.log_output(("Error! Chat monitor ended prematurely!",self.running_chat.is_alive()))
                            self.chat_err = True
                    else:
                        islive = False
                    if self.videoinfo["caught_while"] in ["upcoming","live"]:
                        #use newer metadata while rescuing certain fields from the old metadata
                        createdDateTime = self.videoPostedAt
                        caught_while = self.videoinfo["caught_while"]
                        old_title = self.videoinfo["title"]
                        retries_w_scs = self.videoinfo["retries_of_rerecording_had_scs"]
                        retries_total = self.videoinfo["retries_of_rerecording"]
                        if newmetadata is not None:
                            self.videoinfo = newmetadata
                            self.videoinfo["endedLogAt"] = self.ended_at.timestamp() if self.ended_at else None
                            self.videoinfo["retries_of_rerecording_had_scs"] = retries_w_scs
                            self.videoinfo["retries_of_rerecording"] = retries_total
                            self.videoinfo["createdDateTime"] = createdDateTime
                            self.videoinfo["caught_while"] = caught_while
                            if self.videoinfo["title"] != old_title:
                                self.videoinfo["old_title"] = old_title
                        else:
                            await self.log_output(("couldn't retrieve new metadata for",self.videoid,old_title))
                    else:
                        islive = False
                    if self.msg_counter > 0 and not self.chat_err:
                        had_scs += 1
                        self.videoinfo["retries_of_rerecording_had_scs"] = had_scs
                        self.total_counted_msgs = 0
                        self.total_member_msgs = 0
                        self.total_new_members = 0
                    self.videoinfo["startedLogAt"] = self.started_at.timestamp()
                    self.videoinfo["retries_of_rerecording"] = repeats
                    await self.update_psql_metadata()
                    self.metadata_list.append(self.videoinfo)
                else:
                    await self.log_output(self.videoinfo["title"]+" is not a broadcast recording or premiere")
                    return
            repeats += 1
            await self.log_output((repeats,self.cancelled,had_scs,self.videoinfo["live"]))
            if repeats >= 1 and not self.cancelled and had_scs < 2 and islive:
                await self.log_output("Waiting "+str(self.minutes_wait)+" minutes before re-recording sc-logs")
                await asyncio.sleep(self.minutes_wait*60)
        self.running = False
        await self.log_output("writing to files")
        proper_sc_list = []
        unique_currency_donors={}
        count_scs = 0
        for msg in self.sc_msgs:
            msg_loaded = json.loads(msg)
            if msg_loaded["type"] not in ["newSponsor", "sponsorMessage"]:
                count_scs += 1
                donations = self.donors[msg_loaded["userid"]]["donations"].setdefault(msg_loaded["currency"],[0,0])
                self.donors[msg_loaded["userid"]]["donations"][msg_loaded["currency"]][0] = donations[0] + 1 #amount of donations
                self.donors[msg_loaded["userid"]]["donations"][msg_loaded["currency"]][1] = donations[1] + msg_loaded["value"] #total amount of money donated
                self.unique_donors.setdefault(msg_loaded["currency"], set())
                self.unique_donors[msg_loaded["currency"]].add(msg_loaded["userid"])
            proper_sc_list.append(msg_loaded)
        for currency in self.unique_donors.keys():
            unique_currency_donors[currency] = len(self.unique_donors[currency])
        f = open(self.sc_file, "w")
        f_stats = open(self.stats_file, "w")
        f.write(json.dumps(proper_sc_list))
        await self.log_output((len(proper_sc_list), "unique messages written",count_scs,"are superchats"))
        f.close()
        self.stats.append(await self.loop.run_in_executor(self.t_pool, recount_money, proper_sc_list))
        f_stats.write(json.dumps([self.metadata_list[-1], self.stats[-1], unique_currency_donors]))
        f_stats.close()
        f_donors = open(self.donor_file,"w")
        f_donors.write(json.dumps(self.donors))
        f_donors.close()
        await self.conn.close()
        if self.cancelled:
            os.rename(f.name, f.name+".cancelled")
            os.rename(f_stats.name, f_stats.name + ".cancelled")
            os.rename(f_donors.name, f_donors.name + ".cancelled")
        if not self.chat_err and self.gen_wc and len(self.sc_msgs) > 0 and repeats >= 1 and not self.cancelled:
            await self.loop.run_in_executor(self.t_pool, self.generate_wordcloud, proper_sc_list)

    async def display(self,data,amount):
        if len(data.items) > 0:
            start = datetime.now(timezone.utc)
            chatters = []
            channels = []
            messages = []
            for c in data.items: #data.items contains superchat messages - save them in list while also saving the calculated
                if c.type == "placeholder":
                    self.placeholders += 1
                if c.type == "newSponsor":
                    sc_datetime = datetime.fromtimestamp(c.timestamp/1000.0,timezone.utc)
                    sc_info = {"type": c.type, "id": c.id, "time":c.timestamp,
                               "userid":c.author.channelId, "member_level": c.member_level, "debugtime":sc_datetime.isoformat()}
                    self.total_new_members += 1
                    self.sc_msgs.add(json.dumps(sc_info))
                #sums in a list
                if c.type in ["superChat","superSticker","sponsorMessage"]:
                    if c.currency in self.clean_currency.keys():
                        c.currency = self.clean_currency[c.currency]
                    sc_datetime = datetime.fromtimestamp(c.timestamp/1000.0,timezone.utc)
                    name_used_datetime = start if self.videoinfo["live"] == "none" else sc_datetime
                    sc_weekday = sc_datetime.weekday()
                    sc_hour = sc_datetime.hour
                    sc_minute = sc_datetime.minute
                    sc_user = c.author.name
                    sc_userid = c.author.channelId
                    chat_id = c.id
                    chatters.append((sc_userid,sc_user,sc_datetime,name_used_datetime))
                    channels.append((sc_userid, sc_user, False))
                    if sc_userid not in self.donors.keys():
                        self.donors[sc_userid] = {"names":[sc_user],
                                                 "donations": {}}
                    else:
                        if sc_user not in self.donors[sc_userid]["names"]:
                            self.donors[sc_userid]["names"].append(sc_user)
                    sc_message = c.message
                    sc_color = c.bgColor
                    sc_currency = c.currency.replace(u'\xa0', '')
                    sc_info = {"type": c.type, "id": chat_id, "time":c.timestamp,"currency":sc_currency,"value":c.amountValue,"weekday":sc_weekday,
                               "hour":sc_hour,"minute":sc_minute, "userid":sc_userid, "message":sc_message,
                               "color":sc_color, "debugtime":sc_datetime.isoformat()}
                    if c.type == "sponsorMessage":
                        self.total_member_msgs += 1
                        sc_info["member_level"] = c.member_level
                        #await self.log_output(sc_info)
                    else:
                        self.total_counted_msgs += 1
                    messages.append((self.videoid,chat_id,sc_userid,sc_message,sc_datetime,sc_currency,Decimal(c.amountValue),sc_color))
                    self.stats.append(amount)
                    self.sc_msgs.add(json.dumps(sc_info))
            self.msg_counter = amount["amount_sc"]
            async with self.conn.transaction():
                await self.insert_channels.executemany(channels)
                await self.channel_name_history.executemany(chatters)
                await self.insert_messages.executemany(messages)
            end = datetime.now(timezone.utc)
            await self.log_output(
                self.videoinfo["channel"] + " " + self.videoinfo["title"] + " " + data.items[-1].elapsedTime + " " +
                str(self.msg_counter) + "/"+str(self.total_counted_msgs) + " superchats, "+str(self.total_new_members)+" new members, "+str(self.total_member_msgs)+" member anniversary scs took "+ str((end-start).total_seconds()*1000)+" ms, placeholders: " + str(self.placeholders))

    def generate_wordcloud(self,log):
        wordcloudmake = superchat_wordcloud(log, logname=self.videoid)
        wordcloudmake.generate()

    async def log_output(self,logmsg,level = 20):
        msg_string = ""
        msg_len = len(logmsg)
        if isinstance(logmsg, tuple):
            part_count = 0
            for msg_part in logmsg:
                part_count += 1
                msg_string += str(msg_part)
                if msg_len > part_count:
                    msg_string += " "
        elif isinstance(logmsg, str):
            msg_string = logmsg
        else:
            msg_string = str(logmsg)
        await self.loop.run_in_executor(self.t_pool,self.logger.log,level,msg_string)
        
    def exception_handling(self,loop,context):
        ex_time = datetime.now(timezone.utc)
        self.logger.log(40,"Exception caught")
        self.logger.log(40,context)
예제 #7
0
def get_channel_name(video_id, api_key):
    yt = YouTubeDataAPI(api_key)
    return yt.get_video_metadata(video_id)["channel_title"]
예제 #8
0
def get_video_title(video_id, api_key):
    yt = YouTubeDataAPI(api_key)
    return yt.get_video_metadata(video_id)["video_title"]
예제 #9
0
        def convert_time(row):
            new_time = time.strftime('%Y-%m-%d %H:%M:%S',
                                     time.localtime(row['publish_date']))
            return new_time

        videos_list.reset_index()

        #convert epoch to timestamp
        videos_list['time_pub'] = videos_list.apply(convert_time, axis=1)

        #Extract video ID's to a list
        videos = videos_list['video_id'].tolist()

        #Extract all the video metadata to a dataframe
        query = yt.get_video_metadata(videos)
        vids = pd.DataFrame(query)

        #Sum colums to work out totals
        likes = pd.to_numeric(vids['video_like_count']).sum()
        dislikes = pd.to_numeric(vids['video_dislike_count']).sum()
        comments = pd.to_numeric(vids['video_comment_count']).sum()

        #Apply those totals to columns in our original channel df
        channel['likes'] = likes
        channel['comments'] = comments
        channel['dislikes'] = dislikes

        #restrict the output to only useful columns
        channel = channel[[
            'description', 'video_count', 'view_count', 'subscription_count',
예제 #10
0
                                                    parser=lambda x: x):
                v["minerva_collected"] = time.time()
                out_file.write("%s\n" % json.dumps(v))
                out_file.flush()

                video_ids.append(v["snippet"]["resourceId"]["videoId"])
                video_count += 1
    print("\t Captured playlist videos:", video_count)

    # Now we test our cache to see what videos we've already pulled
    video_ids_to_capture = []
    for v in video_ids:
        this_v_file = os.path.join(target_path, "%s.json" % v)
        if not os.path.exists(this_v_file):
            video_ids_to_capture.append(v)

    # For all videos in this playlist, download their metadata
    slice_size = 50
    for i in range(0, len(video_ids_to_capture), slice_size):
        v_ids = video_ids_to_capture[i:i + slice_size]
        vid_data = yt.get_video_metadata(
            v_ids,
            part=['statistics', 'snippet', 'contentDetails'],
            parser=lambda x: x)

        for v in vid_data:
            v["minerva_collected"] = time.time()
            this_v_file = os.path.join(target_path, "%s.json" % v["id"])
            with open(this_v_file, "w") as out_file:
                out_file.write("%s" % json.dumps(v))
예제 #11
0
import pandas as pd
from youtube_api import YouTubeDataAPI
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

analyser = SentimentIntensityAnalyzer()

api_key = 'AIzaSyBR2kc8R5EzD1rnOjyXZfEL1FOGLKojsg4'
yt = YouTubeDataAPI(api_key)

#video parts
video_parts = ['statistics', 'snippet', 'contentDetails', 'topicDetails']

#check api key is valid
if yt.verify_key():
    sonic = yt.get_video_metadata('szby7ZHLnkA', parser=None, part=video_parts)

    sonic_comments = yt.get_video_comments('szby7ZHLnkA', max_results=100)
    df_comments = pd.DataFrame(sonic_comments)

    df_graph_data = pd.DataFrame(columns=[
        'comment_id', 'commenter_channel_id', 'channel_country', 'text',
        'date', 'neg', 'neu', 'pos', 'compound'
    ])

    for index, row in df_comments.iterrows():
        channel_id = df_comments.iloc[0].commenter_channel_id
        channel_data = yt.get_channel_metadata(channel_id)

        score = analyser.polarity_scores(row['text'])
        graph_row = {
            'comment_id': row['comment_id'],