class ReadOnlyXMPPBot(slixmpp.ClientXMPP): def __init__(self, jid, password, room, socket_address, service_name, bot_nick='EchoBot', **kwargs): # Initialize the parent class super().__init__(jid, password) self.messaging = ZmqMessaging(service_name, socket_address) self.room = room self.nick = bot_nick self.log = logging.getLogger(__file__) # One-shot helper method used to register all the plugins self._register_plugin_helper() self.add_event_handler("session_start", self.start) self.add_event_handler("groupchat_message", self.muc_message) self.add_event_handler('connected', self._connected) self.add_event_handler('disconnected', self._disconnected) def _disconnected(self, *args): self.messaging.send_message('DISCONNECTED') def _connected(self, *args): self.messaging.send_message('CONNECTED') def process(self): self.init_plugins() super().process() def _register_plugin_helper(self): """ One-shot helper method used to register all the plugins """ # Service Discovery self.register_plugin('xep_0030') # XMPP Ping self.register_plugin('xep_0199') # Multiple User Chatroom self.register_plugin('xep_0045') def start(self, event): self.log.info('starting xmpp') self.send_presence() self.plugin['xep_0045'].joinMUC(self.room, self.nick, wait=True) self.get_roster() def muc_message(self, msg): self.messaging.send_message('MSG', msg['mucnick'], msg['body'])
def main(client_secret_filepath, socket_address): messaging = ZmqMessaging('youtube', socket_address) scope = [ 'https://www.googleapis.com/auth/youtube', 'https://www.googleapis.com/auth/youtube.force-ssl', 'https://www.googleapis.com/auth/youtube.readonly' ] youtube_api = youtube_authentication(client_secret_filepath, scope) parts = 'snippet' livestream_response = youtube_api.liveBroadcasts().list( mine=True, part=parts, maxResults=1).execute() live_chat_id = livestream_response.get('items')[0]['snippet']['liveChatId'] livechat_response = youtube_api.liveChatMessages().list( liveChatId=live_chat_id, part='snippet').execute() next_token = livechat_response.get('nextPageToken') polling_interval = livechat_response.get('pollingIntervalMillis') polling_interval = _convert_to_seconds(polling_interval) messaging.send_message('CONNECTED') while True: sleep(polling_interval) part = 'snippet, authorDetails' livechat_response = youtube_api.liveChatMessages().list( liveChatId=live_chat_id, part=part, pageToken=next_token).execute() next_token = livechat_response.get('nextPageToken') polling_interval = livechat_response.get('pollingIntervalMillis') polling_interval = _convert_to_seconds(polling_interval) for live_chat_message in livechat_response.get('items'): snippet = live_chat_message['snippet'] if not bool(snippet['hasDisplayContent']): continue message = snippet['displayMessage'] author = live_chat_message['authorDetails']['displayName'] messaging.send_message('MSG', author, message) messaging.send_message('DISCONNECTED')
def main(client_secret_filepath, socket_address): messaging = ZmqMessaging('youtube', socket_address) scope = ['https://www.googleapis.com/auth/youtube', 'https://www.googleapis.com/auth/youtube.force-ssl', 'https://www.googleapis.com/auth/youtube.readonly'] youtube_api = youtube_authentication(client_secret_filepath, scope) parts = 'snippet' livestream_response = youtube_api.liveBroadcasts().list(mine=True, part=parts, maxResults=1).execute() live_chat_id = livestream_response.get('items')[0]['snippet']['liveChatId'] livechat_response = youtube_api.liveChatMessages().list(liveChatId=live_chat_id, part='snippet').execute() next_token = livechat_response.get('nextPageToken') polling_interval = livechat_response.get('pollingIntervalMillis') polling_interval = _convert_to_seconds(polling_interval) messaging.send_message('CONNECTED') while True: sleep(polling_interval) part = 'snippet, authorDetails' livechat_response = youtube_api.liveChatMessages().list(liveChatId=live_chat_id, part=part, pageToken=next_token).execute() next_token = livechat_response.get('nextPageToken') polling_interval = livechat_response.get('pollingIntervalMillis') polling_interval = _convert_to_seconds(polling_interval) for live_chat_message in livechat_response.get('items'): snippet = live_chat_message['snippet'] if not bool(snippet['hasDisplayContent']): continue message = snippet['displayMessage'] author = live_chat_message['authorDetails']['displayName'] messaging.send_message('MSG', author, message) messaging.send_message('DISCONNECTED')
class ReadOnlyWebSocket(websocket.WebSocketApp): # NOTE: chat_signal defined in `__init__` def __init__(self, streamer_name, namespace, website_url, socket_address, service_name): self.log = logging.getLogger(__name__) self.log.setLevel(0) self.messaging = ZmqMessaging(service_name, socket_address) self._streamer_name = streamer_name self.namespace = namespace self._website_url = website_url self.log.info('Getting Socket IO key!') self.key, heartbeat = self._connect_to_server_helper() self.log.info('Socket IO key got!') # alters URL to be more websocket...ie self._website_socket = self._website_url.replace('http', 'ws') self._website_socket += 'websocket/' super().__init__(self._website_socket + self.key, on_open=self.on_open, on_close=self.on_close, on_message=self.on_message, on_error=self.on_error) def repeat_run_forever(self): while True: try: self.run_forever() except KeyboardInterrupt: break except Exception as e: self.log.info('Socket IO errors: {}'.format(e)) sleep(3) self.messaging.send_message('DISCONNECTED') key, _ = self._connect_to_server_helper() self.url = self._website_socket + key def _connect_to_server_helper(self): r = requests.post(self._website_url) params = r.text # unused variables are connection_timeout and supported_formats key, heartbeat_timeout, _, _ = params.split(':') heartbeat_timeout = int(heartbeat_timeout) return key, heartbeat_timeout def on_open(self, *args): logging.info('Websocket open!') def on_close(self, *args): logging.info('Websocket closed :(') def on_message(self, *args): message = args[1].split(':', 3) key = int(message[0]) # namespace = message[2] if len(message) >= 4: data = message[3] else: data = '' if key == 1 and args[1] == '1::': self.send_packet_helper(1) elif key == 1 and args[1] == '1::{}'.format(self.namespace): self.send_packet_helper(5, data={'name': 'initialize'}) data = {'name': 'join', 'args': ['{}'.format(self._streamer_name)]} self.send_packet_helper(5, data=data) self.log.info('Connected to channel with socket io!') self.messaging.send_message('CONNECTED') elif key == 2: self.send_packet_helper(2) elif key == 5: data = json.loads(data, ) if data['name'] == 'message': message = data['args'][0] sender = html.unescape(message['sender']) message = html.unescape(message['text']) self.messaging.send_message('MSG', sender, message) def on_error(self, *args): print(args[1]) def disconnect(self): callback = '' data = '' # '1::namespace' self.send(':'.join([ str(self.TYPE_KEYS['DISCONNECT']), callback, self.namespace, data ])) def send_packet_helper(self, type_key, data=None): if data is None: data = '' else: data = json.dumps(data) # NOTE: callbacks currently not implemented callback = '' message = ':'.join([str(type_key), callback, self.namespace, data]) self.send(message)
class JavascriptWebscraper: def __init__(self, url=None, comment_element_id=None, author_class_name=None, message_class_name=None, socket_address='', service_name=''): """ `comment_element_id` is the css element where all the comments are, i.e., 'all-comments' for youtube `author_class_name` is the css class which holds the comment author username i.e., 'yt-user-name' for youtube `message_class_name` is the css class which holds the comment test ie., 'comment-text' for youtube """ self.messaging = ZmqMessaging(service_name, socket_address) self.log = logging.getLogger(__name__) self.log.setLevel(logging.NOTSET) self.url = url self._number_of_messages = 0 self.comment_element_id = comment_element_id self.author_class_name = author_class_name self.message_class_name = message_class_name self._driver = None self._kill = False signal.signal(signal.SIGINT, self._exit_gracefully) signal.signal(signal.SIGTERM, self._exit_gracefully) def _exit_gracefully(self, *args, **kwargs): if self._driver is not None: self._driver.quit() self._kill = True def run_forever(self): while True: try: self.log.info('Starting javascript scraper!') self.run() except selenium.common.exceptions.NoSuchElementException: self.log.error('Youtube parameters wrong, shutting down :(') break except Exception as e: if self._kill: break else: self.log.exception('Javascript error!', e) def run(self): if self._driver: self._driver.quit() self._driver = None self.log.info('starting up phantom javascript!') self._driver = webdriver.PhantomJS() # TODO: see if this is needed or not self._driver.set_window_size(1000, 1000) self._driver.get(self.url) # NOTE: need some time for comments to load self.log.info('youtube sleeping for 5 seconds!') sleep(5) self.log.info('youtube done sleeping') all_comments = self._driver.find_element_by_id(self.comment_element_id) # TODO: add in a signal here that all is connected! # NOTE: make sure this is ok if using for anything other than youtube comments = all_comments.find_elements_by_tag_name('li') self._number_of_messages = len(comments) self.messaging.send_message('CONNECTED') while True: sleep(1) comments = all_comments.find_elements_by_tag_name('li') comments_length = len(comments) if comments_length > self._number_of_messages: # NOTE: this number is intentionally NEGATIVE msgs_not_parsed = self._number_of_messages - comments_length self._number_of_messages = len(comments) comments = comments[msgs_not_parsed:] for comment in comments: find_elem = comment.find_element_by_class_name author = find_elem(self.author_class_name).text message = find_elem(self.message_class_name).text self.messaging.send_message('MSG', author, message) self.messaging.send_message('DISCONNECTED')