def getClips(self, auth: TwitchAuth, user_id: int = 0, clip_id: str = "", game_id: int = 0) -> List[TwitchClip]: uri = f"{self.apiUri}/clips" if user_id != 0: uri = f"{uri}?broadcaster_id={user_id}" elif clip_id != "": uri = f"{uri}?id={clip_id}" elif game_id != 0: uri = f"{uri}?game_id={game_id}" else: Logger().error( f"Clips was requested but was given invalid parameters, returning empty object." ) return "" res = get(uri, headers=self.__header__(auth)) clips = list() if res.status_code != 200: Logger().error( f"Clips request returnd a bad status_code. Code: {res.status_code}, Error: {res.text}" ) return clips else: json = loads(res.content) for i in json["data"]: clips.append(TwitchClip(i)) return clips
class Worker(): """ This is a generic worker that will contain the source it will monitor. """ def __init__(self, source: ISources): self.logger = Logger(__class__) self.source: ISources = source self.enabled: bool = False pass def check(self) -> bool: if len(self.source.links) >= 1: self.enabled = True else: self.enabled = False self.logger.info( f"{self.source.siteName} was not enabled. Thread will exit.") def init(self) -> None: """ This is the entry point for the worker. Once its turned on it will check the Source for new items. """ if self.source.sourceEnabled == True: self.logger.debug(f"{self.source.siteName} Worker has started.") while True: news = self.source.getArticles() # Check the DB if it has been posted for i in news: exists = i.exists() if exists == False: i.add() if len(self.source.hooks) >= 1: dq = DiscordQueue() dq.convert(i) res = dq.add() self.discordQueueMessage(i, res) self.logger.debug( f"{self.source.siteName} Worker is going to sleep.") sleep(env.threadSleepTimer) def discordQueueMessage(self, i: Articles, added: bool) -> None: msg: str = '' if i.title != '': msg = i.title else: msg = i.description if added == True: self.logger.info(f"{msg} was added to the Discord queue.") else: self.logger.error( f"{msg} was not added to add to the Discord queue.")
def __init__(self, url: str, siteName: str) -> None: self.logger = Logger(__class__) self.url: str = url self.siteName: str = siteName self.content: RequestSiteContent = RequestContent(url=url) self.content.getPageDetails() # self.rssHelper: IRssContent = rssHelper pass
def __init__(self) -> None: self.logger = Logger(__class__) self.uri = "https://reddit.com/r/aww/top.json" self.siteName = "Reddit" self.links: List[Sources] = list() self.hooks: List[DiscordWebHooks] = list() self.sourceEnabled: bool = False self.outputDiscord: bool = False self.checkEnv(self.siteName)
def __init__(self) -> None: self.logger = Logger(__class__) self.uri = "https://twitch.tv/" self.siteName: str = "Twitch" self.links: List[Sources] = list() self.hooks: List[DiscordWebHooks] = list() self.sourceEnabled: bool = False self.outputDiscord: bool = False self.checkEnv(self.siteName) pass
def __init__(self) -> None: self.uri:str = "" self.logger = Logger(__class__) self.outputDiscord: bool = False self.hooks = List[DiscordWebHooks] = list() self.sourceEnabled: bool = False self.links: List[Sources] = list() pass
def __init__(self) -> None: self.logger = Logger(__class__) self.uri = "https://pokemongohub.net/rss" self.siteName: str = "Pokemon Go Hub" self.authorName: str = "Pokemon Go Hub" self.links = list() self.hooks = list() self.sourceEnabled: bool = False self.outputDiscord: bool = False self.checkEnv(self.siteName) pass
def __init__(self) -> None: self.logger = Logger(__class__) self.uri: str = "https://pso2.com/news" self.siteName: str = "Phantasy Star Online 2" self.authorName: str = f"{self.siteName} Offical Site" self.links = list() self.hooks = list() self.sourceEnabled: bool = False self.outputDiscord: bool = False self.checkEnv(self.siteName) pass
def __init__(self) -> None: self.logger = Logger(__class__) self.uri = "https://example.net/" self.siteName: str = "RSS" self.feedName: str = "" self.RssHelper: IRssContent = IRssContent() self.links: List[Sources] = list() self.hooks: List[DiscordWebHooks] = list() self.sourceEnabled: bool = False self.outputDiscord: bool = False self.checkEnv(self.siteName) pass
def __init__(self) -> None: self.logger = Logger(__class__) self.uri = "https://www.instagram.com/" self.baseUri = self.uri self.siteName: str = "Instagram" self.links: List[Sources] = list() self.hooks: List[DiscordWebHooks] = list() self.sourceEnabled: bool = False self.outputDiscord: bool = False self.currentLink: Sources = Sources() self.checkEnv(self.siteName) pass
def __init__(self) -> None: self.logger = Logger(__class__) self.uri: str = "https://na.finalfantasyxiv.com/lodestone/news/" self.baseUri: str = "https://na.finalfantasyxiv.com" self.siteName: str = "Final Fantasy XIV" self.authorName: str = "Final Fantasy XIV Offical Site" self.links = list() self.hooks = list() self.sourceEnabled: bool = False self.outputDiscord: bool = False self.checkEnv(self.siteName) pass
def __init__(self): self.logger= Logger(__class__) self.uri: str = "https://youtube.com" self.siteName: str = "Youtube" self.feedBase: str = "https://www.youtube.com/feeds/videos.xml?channel_id=" self.links: List[Sources] = list() self.hooks: List[DiscordWebHooks] = list() self.sourceEnabled: bool = False self.outputDiscord: bool = False self.checkEnv(self.siteName) pass
def getUser(self, auth: TwitchAuth, username: str) -> None: uri = f"{self.apiUri}/users?login={username}" res = get(uri, headers=self.__header__(auth)) if res.status_code != 200: Logger().error( f"Failed to get user information. StatusCode: {res.status_code}, Error: {res.text}" ) else: json = loads(res.content) if len(json["data"]) == 1: user = TwitchUser(json["data"][0]) else: Logger().error(f"Did not get a usable object.") user = TwitchUser({}) return user
def getStreams(self, auth: TwitchAuth, game_id: int = 0, user_id: int = 0, user_login: str = "") -> None: uri = f"{self.apiUri}/streams" if game_id != 0: uri = f"{uri}?game_id={game_id}" elif user_id != 0: uri = f"{uri}?user_id={user_id}" elif user_login != "": uri = f"{uri}?user_login={user_login}" else: pass res = get(uri, headers=self.__header__(auth)) streams = list() if res.status_code != 200: Logger().error( f"Streams request returned a bad status_code. Code: {res.status_code}, Error: {res.test}" ) return streams else: json = loads(res.content) if len(json["data"]) == 0: streams.append(TwitchStream()) for i in json["data"]: streams.append(TwitchStream(i)) return streams
def searchForUser(self, auth: TwitchAuth, username: str = "") -> None: if username == "": Logger().error( f"Request to search for user was requested but no user was given." ) else: uri: str = f"{self.apiUri}/search/channels?query={username}" header = self.__header__(auth) res = get(uri, headers=header) if res.status_code != 200: Logger().error( f"Attempted to pull user information but failed. status_code: {res.status_code}, output: {res.text}" ) else: l = list() j = loads(res.content) for i in j["data"]: # Convert the Json date to an object stream = TwitchChannel(i) # Get the game details # stream.game_data = self.getGame(auth,stream.game_id) # video = self.getVideos(auth=auth, user_id=stream.id) l.append(stream) return l
class BChrome(): """ This class helps to interact with Chrome/Selenium. It was made to be used as a Base class for the sources who need Chrome. """ def __init__(self) -> None: self.logger = Logger(__class__) self.uri: str = "" self.driver = self.driverStart() pass def driverStart(self) -> Chrome: options = ChromeOptions() options.add_argument("--disable-extensions") options.add_argument("--headless") options.add_argument("--no-sandbox") options.add_argument("--disable-dev-shm-usage") try: driver = Chrome(options=options) return driver except Exception as e: self.logger.critical(f"Chrome Driver failed to start! Error: {e}") def driverGetContent(self) -> str: try: return self.driver.page_source except Exception as e: self.logger.critical(f"Failed to collect data from {self.uri}. {e}") #def __driverGet__(self, uri: str ) -> None: # self.driverGoTo(uri=uri) def driverGoTo(self, uri: str) -> None: try: self.driver.get(uri) self.driver.implicitly_wait(10) except Exception as e: self.logger.error(f"Driver failed to get {uri}. Error: {e}") def driverClose(self) -> None: try: self.driver.close() except Exception as e: self.logger.error(f"Driver failed to close. Error: {e}")
def getGame(self, auth: TwitchAuth, game_id: int) -> TwitchGameData: uri = f"{self.apiUri}/games?id={game_id}" headers = self.__header__(auth) res = get(uri, headers=headers) if res.status_code != 200: Logger().error( f"Attempted to get Twich Game data but failed on game_id: {game_id}. output: {res.text}" ) return TwitchGameData() else: j = loads(res.content) if len(j["data"]) != 0: game = TwitchGameData(j["data"][0]) else: game = TwitchGameData() return game
def auth(self) -> TwitchAuth: """ https://dev.twitch.tv/docs/authentication/getting-tokens-oauth#oauth-client-credentials-flow """ client_id = str(getenv("NEWSBOT_TWITCH_CLIENT_ID")) client_secret = str(getenv("NEWSBOT_TWITCH_CLIENT_SECRET")) scopes = "user:read:email" uri = f"https://id.twitch.tv/oauth2/token?client_id={client_id}&client_secret={client_secret}&grant_type=client_credentials&scopes={scopes}" res = post(uri) if res.status_code != 200: Logger().error(res.text) return TwitchAuth() else: token = loads(res.content) o = TwitchAuth( access_token=token["access_token"], expires_in=token["expires_in"], token_type=token["token_type"], client_id=client_id, ) return o
def getVideos(self, auth: TwitchAuth, id: int = 0, user_id: int = 0, game_id: int = 0) -> List[TwitchVideo]: uri = f"{self.apiUri}/videos" if id != 0: uri = f"{uri}?id={id}" elif user_id != 0: uri = f"{uri}?user_id={user_id}" elif game_id != 0: uri = f"{uri}?game_id={game_id}" res = get(uri, headers=self.__header__(auth)) videos = list() if res.status_code != 200: Logger().error(f"Failed to request videos") return videos else: json = loads(res.content) for i in json["data"]: videos.append(TwitchVideo(i)) return videos
class BSources(): """ This class contains some common code found in the sources. Do not use this on its own! """ def __init__(self) -> None: self.uri:str = "" self.logger = Logger(__class__) self.outputDiscord: bool = False self.hooks = List[DiscordWebHooks] = list() self.sourceEnabled: bool = False self.links: List[Sources] = list() pass def checkEnv(self, siteName: str) -> None: # Check if site was requested. self.outputDiscord = self.isDiscordEnabled(siteName) if self.outputDiscord == True: self.hooks = self.getDiscordList(siteName) self.sourceEnabled = self.isSourceEnabled(siteName) if self.sourceEnabled == True: self.links = self.getSourceList(siteName) def getSourceList(self, siteName: str) -> List[Sources]: l = list() res = Sources(name=siteName).findAllByName() for i in res: l.append(i) return l def isSourceEnabled(self, siteName: str) -> bool: res = Sources(name=siteName).findAllByName() if len(res) >= 1: return True else: return False def getDiscordList(self, siteName: str) -> List[DiscordWebHooks]: h = list() dwh = DiscordWebHooks(name=siteName).findAllByName() if len(dwh) >= 1: for i in dwh: h.append(i) return h def isDiscordEnabled(self, siteName: str) -> bool: dwh = DiscordWebHooks(name=siteName).findAllByName() if len(dwh) >= 1: return True else: return False def getContent(self) -> Response: try: headers = self.getHeaders() return get(self.uri, headers=headers) except Exception as e: self.logger.critical(f"Failed to collect data from {self.uri}. {e}") def getParser(self, requestsContent: Response = "", seleniumContent: str = "") -> BeautifulSoup: try: if seleniumContent != "": return BeautifulSoup(seleniumContent, features="html.parser") else: return BeautifulSoup(requestsContent.content, features="html.parser") except Exception as e: self.logger.critical(f"failed to parse data returned from requests. {e}") def getHeaders(self) -> dict: return {"User-Agent": "NewsBot - Automated News Delivery"}
class FFXIVReader(ISources, BSources): def __init__(self) -> None: self.logger = Logger(__class__) self.uri: str = "https://na.finalfantasyxiv.com/lodestone/news/" self.baseUri: str = "https://na.finalfantasyxiv.com" self.siteName: str = "Final Fantasy XIV" self.authorName: str = "Final Fantasy XIV Offical Site" self.links = list() self.hooks = list() self.sourceEnabled: bool = False self.outputDiscord: bool = False self.checkEnv(self.siteName) pass def getArticles(self) -> List[Articles]: allArticles: List[Articles] = list() for site in self.links: self.logger.debug(f"{site.name} - Checking for updates.") self.uri = site.url #siteContent: Response = self.getContent() page = self.getParser(requestsContent=self.getContent()) if "Topics" in site.name: try: for news in page.find_all( "li", {"class", "news__list--topics ic__topics--list"}): a = Articles( siteName=self.siteName, tags="ffxiv, topics, news", authorName=self.authorName, ) # a.siteName = self.siteName header = news.contents[0].contents body = news.contents[1].contents a.title = header[0].text a.url = f"{self.baseUri}{header[0].contents[0].attrs['href']}" a.thumbnail = body[0].contents[0].attrs["src"] a.description = body[0].contents[0].next_element.text # a.tags = "Topics" allArticles.append(a) except Exception as e: self.logger.error( f"Failed to collect Topics from FFXIV. {e}") if "Notices" in site.name: try: for news in page.find_all( "a", {"class", "news__list--link ic__info--list"}): a = Articles( siteName=self.siteName, tags="ffxiv, notices, news", authorName=self.authorName, ) # a.siteName = self.siteName a.title = news.text a.url = f"{self.baseUri}{news.attrs['href']}" # a.tags = "Notices" self.uri = a.link #subPage = self.getContent() details = self.getParser( requestsContent=self.getContent()) for d in details.find_all( "div", {"class", "news__detail__wrapper"}): a.description = d.text allArticles.append(a) except Exception as e: self.logger.error( f"Failed to collect Notice from FFXIV. {e}") pass if "Maintenance" in site.name: try: for news in page.find_all( "a", {"class", "news__list--link ic__maintenance--list"}): a = Articles( siteName=self.siteName, tags="ffxiv, maintenance, news", authorName=self.authorName, ) # a.siteName = self.siteName a.title = news.text a.url = f"{self.baseUri}{news.attrs['href']}" # a.tags = site["tag"] self.uri = a.link #subPage = self.getContent() details = self.getParser( requestsContent=self.getContent()) for d in details.find_all( "div", {"class", "news__detail__wrapper"}): a.description = d.text allArticles.append(a) except Exception as e: self.logger.error( f"Failed to collect {site['tag']} records from FFXIV. {e}" ) pass if "Updates" in site.name: try: for news in page.find_all( "a", {"class", "news__list--link ic__update--list"}): a = Articles( siteName=self.siteName, tags="ffxiv, updates, news", authorName=self.authorName, ) a.title = news.text a.url = f"{self.baseUri}{news.attrs['href']}" self.uri = a.link #subPage = self.getContent() details = self.getParser( requestsContent=self.getContent()) for d in details.find_all( "div", {"class", "news__detail__wrapper"}): a.description = d.text allArticles.append(a) except Exception as e: self.logger.error( f"Failed to collect {site['tag']} records from FFXIV. {e}" ) pass if "Status" in site.name: try: for news in page.find_all( "a", {"class", "news__list--link ic__obstacle--list"}): a = Articles( siteName=self.siteName, tags="ffxiv, news, status", authorName=self.authorName, ) a.siteName = self.siteName a.title = news.text a.link = f"{self.baseUri}{news.attrs['href']}" a.tags = site["tag"] self.uri = a.link #subPage = self.getContent() details = self.getParser( requestsContent=self.getContent()) for d in details.find_all( "div", {"class", "news__detail__wrapper"}): a.description = d.text allArticles.append(a) except Exception as e: self.logger.error( f"Failed to collect {site['tag']} records from FFXIV. {e}" ) pass return allArticles
class Discord(IOutputs): def __init__(self) -> None: self.logger = Logger(__class__) self.table = DiscordQueue() self.tempMessage: DiscordWebhook = DiscordWebhook("placeholder") pass def enableThread(self) -> None: while True: # Tell the database to give us the queue on the table. try: queue = self.table.getQueue() for i in queue: resp = self.sendMessage(i) # Only remove the object from the queue if we sent it out correctly. safeToRemove: bool = True for r in resp: if r.status_code != 204: safeToRemove = False if safeToRemove == True: i.remove() sleep(env.discord_delay_seconds) except Exception as e: self.logger.error( f"Failed to post a message. {i.title}. Status_code: {resp.status_code}. msg: {resp.text}. error {e}" ) sleep(env.discord_delay_seconds) def buildMessage(self, article: DiscordQueue) -> None: # reset the stored message self.tempMessage = DiscordWebhook("placeholder") # Extract the webhooks that relate to this site webhooks: List[str] = self.getHooks(article.siteName) # Make a new webhook with the hooks that relate to this site hook: DiscordWebhook = DiscordWebhook(webhooks) # hook.content = article.link title = article.title if len(title) >= 128: title = f"{title[0:128]}..." # Make a new Embed object embed: DiscordEmbed = DiscordEmbed(title=title) # , url=article.link) try: authorIcon = self.getAuthorIcon(article.authorImage, article.siteName) embed.set_author(name=article.authorName, url=None, icon_url=authorIcon) except: pass # Discord Embed Description can only contain 2048 characters ch = ConvertHtml() if article.description != "": description: str = str(article.description) description = self.convertFromHtml(description) description = ch.replaceImages(description, '') #description = self.replaceImages(description) descriptionCount = len(description) if descriptionCount >= 2048: description = description[0:2040] description = f"{description}..." embed.description = description # Figure out if we have video based content if article.video != "": embed.description = "View the video online!" embed.set_video(url=article.video, height=article.videoHeight, width=article.videoWidth) try: if article.thumbnail != "": if " " in article.thumbnail: s = article.thumbnail.split(" ") embed.set_image(url=s[0]) else: embed.set_image(url=article.thumbnail) except Exception as e: self.logger.warning( f"Failed to attach a thumbnail. \r\n {e}\r\n thumbnails: {article.thumbnail}" ) # add the link to the embed embed.add_embed_field(name="Link:", value=article.link) # Build our footer message footer = self.buildFooter(article.siteName) footerIcon = self.getFooterIcon(article.siteName) embed.set_footer(icon_url=footerIcon, text=footer) embed.set_color(color=self.getEmbedColor(article.siteName)) hook.add_embed(embed) self.tempMessage = hook def sendMessage(self, article: DiscordQueue) -> List[Response]: if article.title != "": self.logger.info(f"Discord - Sending article '{article.title}'") else: self.logger.info( f"Discord - Sending article '{article.description}'") self.buildMessage(article) try: res = self.tempMessage.execute() except Exception as e: self.logger.critical( f"Failed to send to Discord. Check to ensure the webhook is correct. Error: {e}" ) hooks: int = len(self.getHooks(article.siteName)) # Chcekcing to see if we returned a single responce or multiple. if hooks == 1: responces = list() responces.append(res) else: responces = res return responces def getHooks(self, newsSource: str) -> List[str]: hooks = list() try: dwh = DiscordWebHooks(name=newsSource).findAllByName() for i in dwh: hooks.append(i.key) return hooks except Exception as e: self.logger.critical( f"Unable to find DiscordWebhook for {newsSource.siteName}") def convertFromHtml(self, msg: str) -> str: msg = msg.replace("<h2>", "**") msg = msg.replace("</h2>", "**") msg = msg.replace("<h3>", "**") msg = msg.replace("</h3>", "**\r\n") msg = msg.replace("<strong>", "**") msg = msg.replace("</strong>", "**\r\n") msg = msg.replace("<ul>", "\r\n") msg = msg.replace("</ul>", "") msg = msg.replace("</li>", "\r\n") msg = msg.replace("<li>", "> ") msg = msg.replace("“", '"') msg = msg.replace("”", '"') msg = msg.replace("…", "...") msg = msg.replace("<b>", "**") msg = msg.replace("</b>", "**") msg = msg.replace("<br>", "\r\n") msg = msg.replace("<br/>", "\r\n") msg = msg.replace("\xe2\x96\xa0", "*") msg = msg.replace("\xa0", "\r\n") msg = msg.replace("<p>", "") msg = msg.replace("</p>", "\r\n") msg = self.replaceLinks(msg) return msg def replaceLinks(self, msg: str) -> str: """ Find the HTML links and replace them with something discord supports. """ # links = re.findall("(?<=<a )(.*)(?=</a>)", msg) msg = msg.replace("'", '"') links = re.findall("<a(.*?)a>", msg) for l in links: hrefs = re.findall('href="(.*?)"', l) texts = re.findall(">(.*?)</", l) if len(hrefs) >= 1 and len(texts) >= 1: discordLink = f"[{texts[0]}]({hrefs[0]})" msg = msg.replace(f"<a{l}a>", discordLink) return msg def replaceImages(self, msg: str) -> str: imgs = re.findall("<img (.*?)>", msg) for i in imgs: # Removing the images for now. # src = re.findall('src=(.*?)">', i) replace = f"<img {i}>" msg = msg.replace(replace, "") return msg def getAuthorIcon(self, authorIcon: str, siteName: str) -> str: if authorIcon != "": return authorIcon else: if (siteName == "Final Fantasy XIV" or siteName == "Phantasy Star Online 2" or siteName == "Pokemon Go Hub"): res = Icons(site=f"Default {siteName}").findAllByName() return res[0].filename else: s: List[str] = siteName.split(" ") if s[0] == "RSS": # res = Icons(site=f"Default {s[1]}").findAllByName() res = Icons(site=siteName).findAllByName() else: res = Icons(site=f"Default {s[0]}").findAllByName() return res[0].filename def buildFooter(self, siteName: str) -> str: footer = "" end: str = "Brought to you by NewsBot" if "reddit" in siteName.lower(): s = siteName.split(" ") footer = f"{end}" elif "Youtube" in siteName: s = siteName.split(" ") footer = f"{s[1]} - {end}" elif "Instagram" in siteName or "Twitter" in siteName: s = siteName.split(" ") if s[1] == "tag": footer = f"#{s[2]} - {end}" elif s[1] == "user": footer = f"{s[2]} - {end}" elif "RSS" in siteName: s = siteName.split(" ") footer = f"{s[1]} - {end}" else: footer = end return footer def getFooterIcon(self, siteName: str) -> str: if (siteName == "Phatnasy Star Online 2" or siteName == "Pokemon Go Hub" or siteName == "Final Fantasy XIV"): res = Icons(site=f"Default {siteName}").findAllByName() return res[0].filename else: s: List[str] = siteName.split(" ") values = (f"Default {s[1]}", f"Default {s[0]}", siteName) for v in values: r = Icons(site=v).findAllByName() if len(r) == 1: res = r # if s[0].lower() == 'rss': # res = Icons(site=f"Default {s[1]}").findAllByName() # else: # res = Icons(site=f"Default {s[0]}").findAllByName() try: if res[0].filename != "": return res[0].filename else: return "" except: return "" def getEmbedColor(self, siteName: str) -> int: # Decimal values can be collected from https://www.spycolor.com if "Reddit" in siteName: return 16395272 elif "YouTube" in siteName: return 16449542 elif "Instagram" in siteName: return 13303930 elif "Twitter" in siteName: return 1937134 elif "Final Fantasy XIV" in siteName: return 11809847 elif "Pokemon Go Hub" in siteName: return 2081673 elif "Phantasy Star Online 2" in siteName: return 5557497 elif "Twitch" in siteName: return 9718783 else: return 0
class TwitchReader(ISources, BSources): def __init__(self) -> None: self.logger = Logger(__class__) self.uri = "https://twitch.tv/" self.siteName: str = "Twitch" self.links: List[Sources] = list() self.hooks: List[DiscordWebHooks] = list() self.sourceEnabled: bool = False self.outputDiscord: bool = False self.checkEnv(self.siteName) pass def getArticles(self) -> List[Articles]: self.logger.debug("Checking Twitch for updates.") api = TwitchAPI() auth = api.auth() allPosts = list() for i in self.links: s = i.name.split(" ") userName = s[2] self.logger.debug(f"Checking Twitch user {userName} for updates.") user_id = Cache(key=f"twitch {userName} user_id").find() if user_id == "": # Take the value and add it to the cache so we dont need to call the API for this user: TwitchUser = api.getUser(auth, userName) user_id = Cache(key=f"twitch {userName} user_id", value=user.id).add() display_name = Cache(key=f"twitch {userName} display_name", value=user.display_name).add() profile_image_url = Cache( key=f"twitch {userName} profile_image_url", value=user.profile_image_url, ).add() else: # We have cached this information already display_name = Cache(key=f"twitch {userName} display").find() profile_image_url = Cache( key=f"twitch {userName} profile_image_url").find() enableClips = Cache(key="twitch clips enabled").find() if enableClips.lower() == "true": clips: List[TwitchClip] = api.getClips(auth, user_id=user_id) for v in clips: try: a = Articles( siteName=f"Twitch user {display_name}", authorName=display_name, authorImage=profile_image_url, tags=f"Twitch, clip, {display_name}", title=v.title, pubDate=v.created_at, url=v.url, thumbnail=v.thumbnail_url, description= "A new clip has been posted! You can watch it with the link below.", ) allPosts.append(a) except Exception as e: self.logger.error(e) enableVoD = Cache(key="twitch vod enable").find() if enableVoD.lower() == "true": videos: List[TwitchVideo] = api.getVideos(auth, user_id=user_id) for v in videos: try: a = Articles( siteName=f"Twitch user {display_name}", authorName=display_name, authorImage=profile_image_url, tags=f"Twitch, vod, {display_name}", # description = v.description, title=v.title, description= "A new video has been posed! You can watch it with the link below.", pubDate=v.published_at, url=v.url, ) thumb: str = v.thumbnail_url thumb = thumb.replace("%{width}", "600") thumb = thumb.replace("%{height}", "400") a.thumbnail = thumb allPosts.append(a) except Exception as e: self.logger.error(e) return allPosts
def __init__(self) -> None: self.logger = Logger(__class__) self.table = DiscordQueue() self.tempMessage: DiscordWebhook = DiscordWebhook("placeholder") pass
class RedditReader(ISources, BSources, BChrome): def __init__(self) -> None: self.logger = Logger(__class__) self.uri = "https://reddit.com/r/aww/top.json" self.siteName = "Reddit" self.links: List[Sources] = list() self.hooks: List[DiscordWebHooks] = list() self.sourceEnabled: bool = False self.outputDiscord: bool = False self.checkEnv(self.siteName) # def checkEnv(self) -> None: # # Check if site was requested. # self.outputDiscord = self.isDiscordEnabled(self.siteName) # if self.outputDiscord == True: # self.hooks = self.getDiscordList(self.siteName) # # self.sourceEnabled = self.isSourceEnabled(self.siteName) # if self.sourceEnabled == True: # self.links = self.getSourceList(self.siteName) def getArticles(self) -> List[Articles]: # TODO Flag NSFW #allowNSFW = True self.driver = self.driverStart() # rss = RSSRoot() allArticles: List[Articles] = list() for source in self.links: authorImage = "" authorName = "" subreddit = source.name.replace("Reddit ", "") self.logger.debug(f"Collecting posts for '/r/{subreddit}'...") # Add the info we get via Selenium to the Cache to avoid pulling it each time. authorImage = Cache(key=f"reddit {subreddit} authorImage").find() authorName = Cache(key=f"reddit {subreddit} authorName").find() if authorImage == "": # Collect values that we do not get from the RSS self.uri = f"https://reddit.com/r/{subreddit}" self.driverGoTo(self.uri) #source = self.driverGetContent() soup = self.getParser(seleniumContent=self.driverGetContent()) subImages = soup.find_all( name="img", attrs={"class": "Mh_Wl6YioFfBc9O1SQ4Jp"}) if len(subImages) != 0: # Failed to find the custom icon. The sub might not have a custom CSS. authorImage = subImages[0].attrs["src"] if authorImage == "": # I am not sure how to deal with svg images at this time. Going to throw in the default reddit icon. subImages = soup.find_all( name="svg", attrs={"class": "ixfotyd9YXZz0LNAtJ25N"}) if len(subImages) == 1: authorImage = "https://www.redditstatic.com/desktop2x/img/favicon/android-icon-192x192.png" subName = soup.find_all( name="h1", attrs={"class": "_2yYPPW47QxD4lFQTKpfpLQ"}) authorName = f"/r/{subreddit} - {subName[0].text}" Cache(key=f"reddit {subreddit} authorImage", value=authorImage).add() Cache(key=f"reddit {subreddit} authorName", value=authorName).add() # Now check the RSS posts = self.getPosts(subreddit) for p in posts: if (Articles(url=f"https://reddit.com{p['data']['permalink']}" ).exists() == False): allArticles.append( self.getPostDetails(p["data"], subreddit, authorName, authorImage)) sleep(5.0) self.driverClose() return allArticles # def getContent(self) -> str: # try: # headers = self.getHeaders() # res = get(self.uri, headers=headers) # return res.text # except Exception as e: # self.logger.critical(f"Failed to collect data from {self.uri}. {e}") # # def getDriverContent(self) -> str: # return self.driver.page_source # # def getParser(self, siteContent: str) -> BeautifulSoup: # try: # return BeautifulSoup(siteContent, features="html.parser") # except Exception as e: # self.logger.critical(f"failed to parse data returned from requests. {e}") def getVideoThumbnail(self, preview) -> str: try: return preview["images"][0]["source"]["url"] except: return "" def getPosts(self, subreddit: str) -> None: rootUri = f"https://reddit.com/r/{subreddit}" items = (f"{rootUri}/top.json", f"{rootUri}.json") for i in items: try: self.uri = i siteContent = self.getContent() page = self.getParser(requestsContent=siteContent) json = loads(page.text) items = json["data"]["children"] if len(items) >= 25: return items except: pass def getPostDetails(self, obj: dict, subreddit: str, authorName: str, authorImage: str) -> Articles: try: a = Articles() a.url = f"https://reddit.com{obj['permalink']}" a.siteName = f"Reddit {subreddit}" a.authorImage = authorImage a.authorName = authorName a.title = f"{obj['title']}" a.tags = obj["subreddit"] # figure out what url we are going to display if obj["is_video"] == True: a.video = obj["media"]["reddit_video"]["fallback_url"] a.videoHeight = obj["media"]["reddit_video"]["height"] a.videoWidth = obj["media"]["reddit_video"]["width"] a.thumbnail = self.getVideoThumbnail(obj["preview"]) elif obj["media_only"] == True: print("review dis") elif "gallery" in obj["url"]: self.uri = obj["url"] source = self.getContent() soup = self.getParser(requestsContent=source) try: images = soup.find_all( name="img", attrs={"class": "_1dwExqTGJH2jnA-MYGkEL-"}) pictures: str = "" for i in images: pictures += f"{i.attrs['src']} " a.thumbnail = pictures except Exception as e: self.logger.error( f"Failed to find the images on a reddit gallery. CSS might have changed." ) else: a.thumbnail = obj["url"] return a except Exception as e: self.logger.error( f"Failed to extract Reddit post. Too many connections? {e}")
def __init__(self, url: str = "") -> None: self.url = url self.logger = Logger(__class__) pass
class RequestContent: """ This is a common class that will request site information. This class will make use of the Requests and BeautifulSoup librarys. Examples: RequestContent(url='www'). RequestContent().setUrl("www"). """ def __init__(self, url: str = "") -> None: self.url = url self.logger = Logger(__class__) pass def setUrl(self, url: str) -> None: """ If you want to parse a URL, set the value here. """ self.url = url def setSoup(self, soup: BeautifulSoup) -> None: """ If the source has already been parsed elsewhere, pass the BeautifulSoup object here. """ self.soup = soup def __getHeaders__(self) -> dict: return {"User-Agent": "NewsBot - Automated News Delivery"} def __getSource__(self) -> str: try: res: Response = get(self.url, headers=self.__getHeaders__()) if res.ok == True: self.__response__: Response = res return res.text else: self.logger.error( f"Attempted to get data from '{self.url}' but did not get any data. StatusCode={res.status_code}" ) return "" except Exception as e: self.logger.critical( f"Failed to get data from '{self.url}' but resulted in an error. {e} " ) def __getSoup__(self) -> BeautifulSoup: try: soup = BeautifulSoup(self.__source__, features="html.parser") return soup except Exception as e: self.logger.error(e) return BeautifulSoup() def getPageDetails(self) -> None: """ This pulls the source code and converts it into a BeautifulSoup object. """ if self.url == "": self.logger.error( "Was requested to pull data from a site, but no URL was passed." ) else: self.__source__ = self.__getSource__() try: if self.__soup__.text == "": self.__soup__ = self.__getSoup__() else: pass except: self.__soup__ = self.__getSoup__() pass def findSingle( self, name: str = "", attrKey: str = "", attrValue: str = "" ) -> BeautifulSoup: if attrKey != "": attrs = {attrKey: attrValue} res = self.__soup__.find(name=name, attrs=attrs) return res else: return self.__soup__.find(name=name) def findMany( self, name: str = "", attrKey: str = "", attrValue: str = "" ) -> List[BeautifulSoup]: if attrKey != "": return self.__soup__.find_all(name=name, attrs={attrKey: attrValue}) else: return self.__soup__.find_all(name=name) def findFeedLink(self) -> dict: atom = self.findSingle( name="link", attrKey="type", attrValue="application/atom+xml" ) rss = self.findSingle( name="link", attrKey="type", attrValue="application/rss+xml" ) json = self.findSingle( name="link", attrKey="type", attrValue="application/json" ) if atom != None: return self.__buildFeedDict__("atom", atom.attrs["href"]) elif rss != None: return self.__buildFeedDict__("rss", rss.attrs["href"]) elif json != None: return self.__buildFeedDict__("json", json.attrs["href"]) else: return self.__buildFeedDict__("none", None) def __buildFeedDict__(self, type: str, content: str) -> dict: return {"type": type, "content": content} def findSiteIcon(self, siteUrl: str) -> str: """ This will go and attempt to extract the 'apple-touch-icon' from the header. return: str """ # if a site url contains the / lets remove it if siteUrl.endswith("/") == True: siteUrl = siteUrl.strip("/") bestSize: int = -1 icons = self.findMany(name="link", attrKey="rel", attrValue="apple-touch-icon") # look though all the icons given, find the largest one. for icon in icons: size: int = int(icon.attrs["sizes"].split("x")[0]) if size > bestSize: bestSize = size # take what we found as the largest icon and store it. for icon in icons: size: int = int(icon.attrs["sizes"].split("x")[0]) if size == bestSize: href = icon.attrs["href"] if "http" in href or "https" in href: return href else: return f"{siteUrl}{href}" return "" def findArticleThumbnail(self) -> str: """ This is best used on articles, not on root the main site page. It will go and check the page for defined thumbnails and return the first one it finds, if any. return: str """ meta = ( {"name": "meta", "attrKey": "property", "attrValue": "og:image"}, {"name": "meta", "attrKey": "name", "attrValue": "twitter:image:src"}, ) for i in meta: try: item = self.findSingle( name=i["name"], attrKey=i["attrKey"], attrValue=i["attrValue"] ) if item.attrs["content"] != "": thumb = item.attrs["content"] return thumb except: pass return "" def findArticleDescription(self) -> str: lookups = ( {"name": "div", "key": "class", "value": "entry-content e-content"}, {"name": "div", "key": "class", "value": "engadget-post-contents"}, {"name": "div", "key": "class", "value": "article-content post-page"}, ) for l in lookups: content = self.findSingle( name=l["name"], attrKey=l["key"], attrValue=l["value"] ) if content.text != "": return content.text
def __init__(self) -> None: self.logger = Logger(__class__) self.uri: str = "" self.driver = self.driverStart() pass
def __init__(self, source: ISources): self.logger = Logger(__class__) self.source: ISources = source self.enabled: bool = False pass
class PogohubReader(ISources, BSources): def __init__(self) -> None: self.logger = Logger(__class__) self.uri = "https://pokemongohub.net/rss" self.siteName: str = "Pokemon Go Hub" self.authorName: str = "Pokemon Go Hub" self.links = list() self.hooks = list() self.sourceEnabled: bool = False self.outputDiscord: bool = False self.checkEnv(self.siteName) pass def getArticles(self) -> List[Articles]: for site in self.links: self.logger.debug(f"{site.name} - Checking for updates.") self.uri = site.url siteContent: Response = self.getContent() if siteContent.status_code != 200: raise UnableToFindContent( f"Did not get status code 200. Got status code {siteContent.status_code}" ) bs: BeautifulSoup = self.getParser(requestsContent=siteContent) allArticles: List[Articles] = list() try: mainLoop = bs.contents[1].contents[1].contents for i in mainLoop: if i.name == "item": item: Articles = self.processItem(i) # we are doing the check here to see if we need to fetch the thumbnail. # if we have seen the link already, move on and save on time. seenAlready = item.exists() if seenAlready == False: # get thumbnail item.thumbnail = self.getArticleThumbnail(item.url) allArticles.append(item) self.logger.debug(f"Pokemon Go Hub - Finished checking.") except Exception as e: self.logger.error( f"Failed to parse articles from Pokemon Go Hub. Chances are we have a malformed responce. {e}" ) return allArticles def processItem(self, item: object) -> Articles: a = Articles( siteName=self.siteName, authorName=self.authorName, tags="pokemon go hub, pokemon, go, hub, news", ) for i in item.contents: if i.name == "title": a.title = i.next elif i.name == "link": a.url = self.removeHTMLTags(i.next) elif i.name == "pubdate": a.pubDate = i.next elif i.name == "category": a.tags = i.next elif i.name == "description": a.description = self.removeHTMLTags(i.next) elif i.name == "content:encoded": a.content = i.next return a def removeHTMLTags(self, text: str) -> str: tags = ("<p>", "</p>", "<img >", "<h2>") text = text.replace("\n", "") text = text.replace("\t", "") text = text.replace("<p>", "") text = text.replace("</p>", "\r\n") text = text.replace("’", "'") spans = re.finditer("(?<=<span )(.*)(?=>)", text) try: if len(spans) >= 1: print("money") except: pass return text def getArticleThumbnail(self, link: str) -> str: try: self.uri = link r = self.getContent() bs: BeautifulSoup = BeautifulSoup(r.content, features="html.parser") res = bs.find_all("img", class_="entry-thumb") return res[0].attrs["src"] except Exception as e: self.logger.error( f"Failed to pull Pokemon Go Hub thumbnail or {link}. {e}")