Exemplos de Logger.error em Python, exemplos de newsbot.logger.Logger.error em Python

Exemplo n.º 1

0

Exibir arquivo

class Worker():
    """
    This is a generic worker that will contain the source it will monitor.
    """
    def __init__(self, source: ISources):
        self.logger = Logger(__class__)
        self.source: ISources = source
        self.enabled: bool = False
        pass

    def check(self) -> bool:
        if len(self.source.links) >= 1:
            self.enabled = True
        else:
            self.enabled = False
            self.logger.info(
                f"{self.source.siteName} was not enabled.  Thread will exit.")

    def init(self) -> None:
        """
        This is the entry point for the worker.  
        Once its turned on it will check the Source for new items.
        """
        if self.source.sourceEnabled == True:
            self.logger.debug(f"{self.source.siteName} Worker has started.")

            while True:
                news = self.source.getArticles()

                # Check the DB if it has been posted
                for i in news:
                    exists = i.exists()

                    if exists == False:
                        i.add()

                        if len(self.source.hooks) >= 1:
                            dq = DiscordQueue()
                            dq.convert(i)
                            res = dq.add()

                            self.discordQueueMessage(i, res)

                self.logger.debug(
                    f"{self.source.siteName} Worker is going to sleep.")
                sleep(env.threadSleepTimer)

    def discordQueueMessage(self, i: Articles, added: bool) -> None:
        msg: str = ''
        if i.title != '':
            msg = i.title
        else:
            msg = i.description

        if added == True:
            self.logger.info(f"{msg} was added to the Discord queue.")
        else:
            self.logger.error(
                f"{msg} was not added to add to the Discord queue.")

Exemplo n.º 2

0

Exibir arquivo

class BChrome():
    """
    This class helps to interact with Chrome/Selenium.
    It was made to be used as a Base class for the sources who need Chrome.
    """
    def __init__(self) -> None:
        self.logger = Logger(__class__)
        self.uri: str = ""
        self.driver = self.driverStart()
        pass

    def driverStart(self) -> Chrome:
        options = ChromeOptions()
        options.add_argument("--disable-extensions")
        options.add_argument("--headless")
        options.add_argument("--no-sandbox")
        options.add_argument("--disable-dev-shm-usage")
        try:
            driver = Chrome(options=options)
            return driver
        except Exception as e:
            self.logger.critical(f"Chrome Driver failed to start! Error: {e}")

    def driverGetContent(self) -> str:
        try:
            return self.driver.page_source
        except Exception as e:
            self.logger.critical(f"Failed to collect data from {self.uri}. {e}")

    #def __driverGet__(self, uri: str ) -> None:
    #    self.driverGoTo(uri=uri)

    def driverGoTo(self, uri: str) -> None:
        try:
            self.driver.get(uri)
            self.driver.implicitly_wait(10)
        except Exception as e:
            self.logger.error(f"Driver failed to get {uri}. Error: {e}")

    def driverClose(self) -> None:
        try:
            self.driver.close()
        except Exception as e:
            self.logger.error(f"Driver failed to close. Error: {e}")

Exemplo n.º 3

0

Exibir arquivo

Arquivo: ffxiv.py Projeto: jtom38/newsbot

class FFXIVReader(ISources, BSources):
    def __init__(self) -> None:
        self.logger = Logger(__class__)
        self.uri: str = "https://na.finalfantasyxiv.com/lodestone/news/"
        self.baseUri: str = "https://na.finalfantasyxiv.com"
        self.siteName: str = "Final Fantasy XIV"
        self.authorName: str = "Final Fantasy XIV Offical Site"
        self.links = list()
        self.hooks = list()
        self.sourceEnabled: bool = False
        self.outputDiscord: bool = False
        self.checkEnv(self.siteName)
        pass

    def getArticles(self) -> List[Articles]:
        allArticles: List[Articles] = list()
        for site in self.links:
            self.logger.debug(f"{site.name} - Checking for updates.")
            self.uri = site.url

            #siteContent: Response = self.getContent()
            page = self.getParser(requestsContent=self.getContent())

            if "Topics" in site.name:
                try:
                    for news in page.find_all(
                            "li",
                        {"class", "news__list--topics ic__topics--list"}):
                        a = Articles(
                            siteName=self.siteName,
                            tags="ffxiv, topics, news",
                            authorName=self.authorName,
                        )
                        # a.siteName = self.siteName
                        header = news.contents[0].contents
                        body = news.contents[1].contents
                        a.title = header[0].text
                        a.url = f"{self.baseUri}{header[0].contents[0].attrs['href']}"
                        a.thumbnail = body[0].contents[0].attrs["src"]
                        a.description = body[0].contents[0].next_element.text
                        # a.tags = "Topics"
                        allArticles.append(a)
                except Exception as e:
                    self.logger.error(
                        f"Failed to collect Topics from FFXIV. {e}")

            if "Notices" in site.name:
                try:
                    for news in page.find_all(
                            "a", {"class", "news__list--link ic__info--list"}):
                        a = Articles(
                            siteName=self.siteName,
                            tags="ffxiv, notices, news",
                            authorName=self.authorName,
                        )
                        # a.siteName = self.siteName
                        a.title = news.text
                        a.url = f"{self.baseUri}{news.attrs['href']}"
                        # a.tags = "Notices"
                        self.uri = a.link
                        #subPage = self.getContent()
                        details = self.getParser(
                            requestsContent=self.getContent())
                        for d in details.find_all(
                                "div", {"class", "news__detail__wrapper"}):
                            a.description = d.text
                        allArticles.append(a)
                except Exception as e:
                    self.logger.error(
                        f"Failed to collect Notice from FFXIV. {e}")
                    pass

            if "Maintenance" in site.name:
                try:
                    for news in page.find_all(
                            "a",
                        {"class", "news__list--link ic__maintenance--list"}):
                        a = Articles(
                            siteName=self.siteName,
                            tags="ffxiv, maintenance, news",
                            authorName=self.authorName,
                        )
                        # a.siteName = self.siteName
                        a.title = news.text
                        a.url = f"{self.baseUri}{news.attrs['href']}"
                        # a.tags = site["tag"]
                        self.uri = a.link
                        #subPage = self.getContent()
                        details = self.getParser(
                            requestsContent=self.getContent())
                        for d in details.find_all(
                                "div", {"class", "news__detail__wrapper"}):
                            a.description = d.text

                        allArticles.append(a)
                except Exception as e:
                    self.logger.error(
                        f"Failed to collect {site['tag']} records from FFXIV. {e}"
                    )
                    pass

            if "Updates" in site.name:
                try:
                    for news in page.find_all(
                            "a",
                        {"class", "news__list--link ic__update--list"}):
                        a = Articles(
                            siteName=self.siteName,
                            tags="ffxiv, updates, news",
                            authorName=self.authorName,
                        )
                        a.title = news.text
                        a.url = f"{self.baseUri}{news.attrs['href']}"
                        self.uri = a.link

                        #subPage = self.getContent()
                        details = self.getParser(
                            requestsContent=self.getContent())

                        for d in details.find_all(
                                "div", {"class", "news__detail__wrapper"}):
                            a.description = d.text
                        allArticles.append(a)
                except Exception as e:
                    self.logger.error(
                        f"Failed to collect {site['tag']} records from FFXIV. {e}"
                    )
                    pass

            if "Status" in site.name:
                try:
                    for news in page.find_all(
                            "a",
                        {"class", "news__list--link ic__obstacle--list"}):
                        a = Articles(
                            siteName=self.siteName,
                            tags="ffxiv, news, status",
                            authorName=self.authorName,
                        )
                        a.siteName = self.siteName
                        a.title = news.text
                        a.link = f"{self.baseUri}{news.attrs['href']}"
                        a.tags = site["tag"]
                        self.uri = a.link

                        #subPage = self.getContent()
                        details = self.getParser(
                            requestsContent=self.getContent())

                        for d in details.find_all(
                                "div", {"class", "news__detail__wrapper"}):
                            a.description = d.text
                        allArticles.append(a)
                except Exception as e:
                    self.logger.error(
                        f"Failed to collect {site['tag']} records from FFXIV. {e}"
                    )
                    pass

        return allArticles

Exemplo n.º 4

0

Exibir arquivo

class TwitchReader(ISources, BSources):
    def __init__(self) -> None:
        self.logger = Logger(__class__)
        self.uri = "https://twitch.tv/"
        self.siteName: str = "Twitch"
        self.links: List[Sources] = list()
        self.hooks: List[DiscordWebHooks] = list()
        self.sourceEnabled: bool = False
        self.outputDiscord: bool = False
        self.checkEnv(self.siteName)
        pass

    def getArticles(self) -> List[Articles]:
        self.logger.debug("Checking Twitch for updates.")
        api = TwitchAPI()
        auth = api.auth()

        allPosts = list()
        for i in self.links:
            s = i.name.split(" ")
            userName = s[2]
            self.logger.debug(f"Checking Twitch user {userName} for updates.")

            user_id = Cache(key=f"twitch {userName} user_id").find()
            if user_id == "":
                # Take the value and add it to the cache so we dont need to call the API for this
                user: TwitchUser = api.getUser(auth, userName)
                user_id = Cache(key=f"twitch {userName} user_id",
                                value=user.id).add()
                display_name = Cache(key=f"twitch {userName} display_name",
                                     value=user.display_name).add()
                profile_image_url = Cache(
                    key=f"twitch {userName} profile_image_url",
                    value=user.profile_image_url,
                ).add()
            else:
                # We have cached this information already
                display_name = Cache(key=f"twitch {userName} display").find()
                profile_image_url = Cache(
                    key=f"twitch {userName} profile_image_url").find()

            enableClips = Cache(key="twitch clips enabled").find()
            if enableClips.lower() == "true":
                clips: List[TwitchClip] = api.getClips(auth, user_id=user_id)
                for v in clips:
                    try:
                        a = Articles(
                            siteName=f"Twitch user {display_name}",
                            authorName=display_name,
                            authorImage=profile_image_url,
                            tags=f"Twitch, clip, {display_name}",
                            title=v.title,
                            pubDate=v.created_at,
                            url=v.url,
                            thumbnail=v.thumbnail_url,
                            description=
                            "A new clip has been posted! You can watch it with the link below.",
                        )
                        allPosts.append(a)
                    except Exception as e:
                        self.logger.error(e)

            enableVoD = Cache(key="twitch vod enable").find()
            if enableVoD.lower() == "true":
                videos: List[TwitchVideo] = api.getVideos(auth,
                                                          user_id=user_id)
                for v in videos:
                    try:
                        a = Articles(
                            siteName=f"Twitch user {display_name}",
                            authorName=display_name,
                            authorImage=profile_image_url,
                            tags=f"Twitch, vod, {display_name}",
                            # description = v.description,
                            title=v.title,
                            description=
                            "A new video has been posed! You can watch it with the link below.",
                            pubDate=v.published_at,
                            url=v.url,
                        )
                        thumb: str = v.thumbnail_url
                        thumb = thumb.replace("%{width}", "600")
                        thumb = thumb.replace("%{height}", "400")
                        a.thumbnail = thumb
                        allPosts.append(a)
                    except Exception as e:
                        self.logger.error(e)

        return allPosts

Exemplo n.º 5

0

Exibir arquivo

class RedditReader(ISources, BSources, BChrome):
    def __init__(self) -> None:
        self.logger = Logger(__class__)
        self.uri = "https://reddit.com/r/aww/top.json"
        self.siteName = "Reddit"
        self.links: List[Sources] = list()
        self.hooks: List[DiscordWebHooks] = list()
        self.sourceEnabled: bool = False
        self.outputDiscord: bool = False

        self.checkEnv(self.siteName)

#    def checkEnv(self) -> None:
#        # Check if site was requested.
#        self.outputDiscord = self.isDiscordEnabled(self.siteName)
#        if self.outputDiscord == True:
#            self.hooks = self.getDiscordList(self.siteName)
#
#        self.sourceEnabled = self.isSourceEnabled(self.siteName)
#        if self.sourceEnabled == True:
#            self.links = self.getSourceList(self.siteName)

    def getArticles(self) -> List[Articles]:
        # TODO Flag NSFW
        #allowNSFW = True

        self.driver = self.driverStart()

        # rss = RSSRoot()
        allArticles: List[Articles] = list()
        for source in self.links:
            authorImage = ""
            authorName = ""
            subreddit = source.name.replace("Reddit ", "")

            self.logger.debug(f"Collecting posts for '/r/{subreddit}'...")

            # Add the info we get via Selenium to the Cache to avoid pulling it each time.
            authorImage = Cache(key=f"reddit {subreddit} authorImage").find()
            authorName = Cache(key=f"reddit {subreddit} authorName").find()
            if authorImage == "":
                # Collect values that we do not get from the RSS
                self.uri = f"https://reddit.com/r/{subreddit}"
                self.driverGoTo(self.uri)
                #source = self.driverGetContent()
                soup = self.getParser(seleniumContent=self.driverGetContent())

                subImages = soup.find_all(
                    name="img", attrs={"class": "Mh_Wl6YioFfBc9O1SQ4Jp"})
                if len(subImages) != 0:
                    # Failed to find the custom icon.  The sub might not have a custom CSS.
                    authorImage = subImages[0].attrs["src"]

                if authorImage == "":
                    # I am not sure how to deal with svg images at this time.  Going to throw in the default reddit icon.
                    subImages = soup.find_all(
                        name="svg", attrs={"class": "ixfotyd9YXZz0LNAtJ25N"})
                    if len(subImages) == 1:
                        authorImage = "https://www.redditstatic.com/desktop2x/img/favicon/android-icon-192x192.png"

                subName = soup.find_all(
                    name="h1", attrs={"class": "_2yYPPW47QxD4lFQTKpfpLQ"})
                authorName = f"/r/{subreddit} - {subName[0].text}"
                Cache(key=f"reddit {subreddit} authorImage",
                      value=authorImage).add()
                Cache(key=f"reddit {subreddit} authorName",
                      value=authorName).add()

            # Now check the RSS
            posts = self.getPosts(subreddit)
            for p in posts:
                if (Articles(url=f"https://reddit.com{p['data']['permalink']}"
                             ).exists() == False):
                    allArticles.append(
                        self.getPostDetails(p["data"], subreddit, authorName,
                                            authorImage))

            sleep(5.0)

        self.driverClose()
        return allArticles

#    def getContent(self) -> str:
#        try:
#            headers = self.getHeaders()
#            res = get(self.uri, headers=headers)
#            return res.text
#        except Exception as e:
#            self.logger.critical(f"Failed to collect data from {self.uri}. {e}")
#
#    def getDriverContent(self) -> str:
#        return self.driver.page_source
#
#    def getParser(self, siteContent: str) -> BeautifulSoup:
#        try:
#            return BeautifulSoup(siteContent, features="html.parser")
#        except Exception as e:
#            self.logger.critical(f"failed to parse data returned from requests. {e}")

    def getVideoThumbnail(self, preview) -> str:
        try:
            return preview["images"][0]["source"]["url"]
        except:
            return ""

    def getPosts(self, subreddit: str) -> None:
        rootUri = f"https://reddit.com/r/{subreddit}"
        items = (f"{rootUri}/top.json", f"{rootUri}.json")
        for i in items:
            try:
                self.uri = i
                siteContent = self.getContent()
                page = self.getParser(requestsContent=siteContent)
                json = loads(page.text)
                items = json["data"]["children"]
                if len(items) >= 25:
                    return items
            except:
                pass

    def getPostDetails(self, obj: dict, subreddit: str, authorName: str,
                       authorImage: str) -> Articles:
        try:

            a = Articles()
            a.url = f"https://reddit.com{obj['permalink']}"
            a.siteName = f"Reddit {subreddit}"
            a.authorImage = authorImage
            a.authorName = authorName
            a.title = f"{obj['title']}"
            a.tags = obj["subreddit"]

            # figure out what url we are going to display
            if obj["is_video"] == True:
                a.video = obj["media"]["reddit_video"]["fallback_url"]
                a.videoHeight = obj["media"]["reddit_video"]["height"]
                a.videoWidth = obj["media"]["reddit_video"]["width"]
                a.thumbnail = self.getVideoThumbnail(obj["preview"])

            elif obj["media_only"] == True:
                print("review dis")
            elif "gallery" in obj["url"]:
                self.uri = obj["url"]
                source = self.getContent()
                soup = self.getParser(requestsContent=source)
                try:
                    images = soup.find_all(
                        name="img", attrs={"class": "_1dwExqTGJH2jnA-MYGkEL-"})
                    pictures: str = ""
                    for i in images:
                        pictures += f"{i.attrs['src']} "
                    a.thumbnail = pictures
                except Exception as e:
                    self.logger.error(
                        f"Failed to find the images on a reddit gallery.  CSS might have changed."
                    )
            else:
                a.thumbnail = obj["url"]

            return a
        except Exception as e:
            self.logger.error(
                f"Failed to extract Reddit post.  Too many connections? {e}")

Exemplo n.º 6

0

Exibir arquivo

Arquivo: requestContent.py Projeto: jtom38/newsbot

class RequestContent:
    """
    This is a common class that will request site information.
    This class will make use of the Requests and BeautifulSoup librarys.

    Examples:
    RequestContent(url='www').
    RequestContent().setUrl("www").
    """

    def __init__(self, url: str = "") -> None:
        self.url = url
        self.logger = Logger(__class__)
        pass

    def setUrl(self, url: str) -> None:
        """
        If you want to parse a URL, set the value here.
        """
        self.url = url

    def setSoup(self, soup: BeautifulSoup) -> None:
        """
        If the source has already been parsed elsewhere, pass the BeautifulSoup object here.
        """
        self.soup = soup

    def __getHeaders__(self) -> dict:
        return {"User-Agent": "NewsBot - Automated News Delivery"}

    def __getSource__(self) -> str:
        try:
            res: Response = get(self.url, headers=self.__getHeaders__())
            if res.ok == True:
                self.__response__: Response = res
                return res.text
            else:
                self.logger.error(
                    f"Attempted to get data from '{self.url}' but did not get any data.  StatusCode={res.status_code}"
                )
                return ""
        except Exception as e:
            self.logger.critical(
                f"Failed to get data from '{self.url}' but resulted in an error. {e} "
            )

    def __getSoup__(self) -> BeautifulSoup:
        try:
            soup = BeautifulSoup(self.__source__, features="html.parser")
            return soup
        except Exception as e:
            self.logger.error(e)
            return BeautifulSoup()

    def getPageDetails(self) -> None:
        """
        This pulls the source code and converts it into a BeautifulSoup object.
        """
        if self.url == "":
            self.logger.error(
                "Was requested to pull data from a site, but no URL was passed."
            )
        else:
            self.__source__ = self.__getSource__()

        try:
            if self.__soup__.text == "":
                self.__soup__ = self.__getSoup__()
            else:
                pass
        except:
            self.__soup__ = self.__getSoup__()

        pass

    def findSingle(
        self, name: str = "", attrKey: str = "", attrValue: str = ""
    ) -> BeautifulSoup:
        if attrKey != "":
            attrs = {attrKey: attrValue}
            res = self.__soup__.find(name=name, attrs=attrs)
            return res
        else:
            return self.__soup__.find(name=name)

    def findMany(
        self, name: str = "", attrKey: str = "", attrValue: str = ""
    ) -> List[BeautifulSoup]:
        if attrKey != "":
            return self.__soup__.find_all(name=name, attrs={attrKey: attrValue})
        else:
            return self.__soup__.find_all(name=name)

    def findFeedLink(self) -> dict:
        atom = self.findSingle(
            name="link", attrKey="type", attrValue="application/atom+xml"
        )
        rss = self.findSingle(
            name="link", attrKey="type", attrValue="application/rss+xml"
        )
        json = self.findSingle(
            name="link", attrKey="type", attrValue="application/json"
        )

        if atom != None:
            return self.__buildFeedDict__("atom", atom.attrs["href"])
        elif rss != None:
            return self.__buildFeedDict__("rss", rss.attrs["href"])
        elif json != None:
            return self.__buildFeedDict__("json", json.attrs["href"])
        else:
            return self.__buildFeedDict__("none", None)

    def __buildFeedDict__(self, type: str, content: str) -> dict:
        return {"type": type, "content": content}

    def findSiteIcon(self, siteUrl: str) -> str:
        """
        This will go and attempt to extract the 'apple-touch-icon' from the header.

        return: str
        """
        # if a site url contains the / lets remove it
        if siteUrl.endswith("/") == True:
            siteUrl = siteUrl.strip("/")

        bestSize: int = -1
        icons = self.findMany(name="link", attrKey="rel", attrValue="apple-touch-icon")
        # look though all the icons given, find the largest one.
        for icon in icons:
            size: int = int(icon.attrs["sizes"].split("x")[0])
            if size > bestSize:
                bestSize = size

        # take what we found as the largest icon and store it.
        for icon in icons:
            size: int = int(icon.attrs["sizes"].split("x")[0])
            if size == bestSize:
                href = icon.attrs["href"]
                if "http" in href or "https" in href:
                    return href
                else:
                    return f"{siteUrl}{href}"
        return ""

    def findArticleThumbnail(self) -> str:
        """
        This is best used on articles, not on root the main site page.
        It will go and check the page for defined thumbnails and return the first one it finds, if any.

        return: str
        """
        meta = (
            {"name": "meta", "attrKey": "property", "attrValue": "og:image"},
            {"name": "meta", "attrKey": "name", "attrValue": "twitter:image:src"},
        )

        for i in meta:
            try:
                item = self.findSingle(
                    name=i["name"], attrKey=i["attrKey"], attrValue=i["attrValue"]
                )
                if item.attrs["content"] != "":
                    thumb = item.attrs["content"]
                    return thumb
            except:
                pass
        return ""

    def findArticleDescription(self) -> str:
        lookups = (
            {"name": "div", "key": "class", "value": "entry-content e-content"},
            {"name": "div", "key": "class", "value": "engadget-post-contents"},
            {"name": "div", "key": "class", "value": "article-content post-page"},
        )

        for l in lookups:
            content = self.findSingle(
                name=l["name"], attrKey=l["key"], attrValue=l["value"]
            )
            if content.text != "":
                return content.text

Exemplo n.º 7

0

Exibir arquivo

class PogohubReader(ISources, BSources):
    def __init__(self) -> None:
        self.logger = Logger(__class__)
        self.uri = "https://pokemongohub.net/rss"
        self.siteName: str = "Pokemon Go Hub"
        self.authorName: str = "Pokemon Go Hub"
        self.links = list()
        self.hooks = list()
        self.sourceEnabled: bool = False
        self.outputDiscord: bool = False
        self.checkEnv(self.siteName)
        pass

    def getArticles(self) -> List[Articles]:
        for site in self.links:
            self.logger.debug(f"{site.name} - Checking for updates.")
            self.uri = site.url

            siteContent: Response = self.getContent()
            if siteContent.status_code != 200:
                raise UnableToFindContent(
                    f"Did not get status code 200.  Got status code {siteContent.status_code}"
                )

            bs: BeautifulSoup = self.getParser(requestsContent=siteContent)

            allArticles: List[Articles] = list()
            try:
                mainLoop = bs.contents[1].contents[1].contents

                for i in mainLoop:
                    if i.name == "item":
                        item: Articles = self.processItem(i)

                        # we are doing the check here to see if we need to fetch the thumbnail.
                        # if we have seen the link already, move on and save on time.
                        seenAlready = item.exists()
                        if seenAlready == False:
                            # get thumbnail
                            item.thumbnail = self.getArticleThumbnail(item.url)
                            allArticles.append(item)

                self.logger.debug(f"Pokemon Go Hub - Finished checking.")
            except Exception as e:
                self.logger.error(
                    f"Failed to parse articles from Pokemon Go Hub.  Chances are we have a malformed responce. {e}"
                )

        return allArticles

    def processItem(self, item: object) -> Articles:
        a = Articles(
            siteName=self.siteName,
            authorName=self.authorName,
            tags="pokemon go hub, pokemon, go, hub, news",
        )

        for i in item.contents:
            if i.name == "title":
                a.title = i.next
            elif i.name == "link":
                a.url = self.removeHTMLTags(i.next)
            elif i.name == "pubdate":
                a.pubDate = i.next
            elif i.name == "category":
                a.tags = i.next
            elif i.name == "description":
                a.description = self.removeHTMLTags(i.next)
            elif i.name == "content:encoded":
                a.content = i.next
        return a

    def removeHTMLTags(self, text: str) -> str:
        tags = ("<p>", "</p>", "<img >", "<h2>")
        text = text.replace("\n", "")
        text = text.replace("\t", "")
        text = text.replace("<p>", "")
        text = text.replace("</p>", "\r\n")
        text = text.replace("&#8217;", "'")
        spans = re.finditer("(?<=<span )(.*)(?=>)", text)
        try:
            if len(spans) >= 1:
                print("money")
        except:
            pass

        return text

    def getArticleThumbnail(self, link: str) -> str:
        try:
            self.uri = link
            r = self.getContent()
            bs: BeautifulSoup = BeautifulSoup(r.content,
                                              features="html.parser")
            res = bs.find_all("img", class_="entry-thumb")
            return res[0].attrs["src"]
        except Exception as e:
            self.logger.error(
                f"Failed to pull Pokemon Go Hub thumbnail or {link}. {e}")

Exemplo n.º 8

0

Exibir arquivo

Arquivo: discord.py Projeto: jtom38/newsbot

class Discord(IOutputs):
    def __init__(self) -> None:
        self.logger = Logger(__class__)
        self.table = DiscordQueue()
        self.tempMessage: DiscordWebhook = DiscordWebhook("placeholder")
        pass

    def enableThread(self) -> None:
        while True:
            # Tell the database to give us the queue on the table.
            try:
                queue = self.table.getQueue()

                for i in queue:

                    resp = self.sendMessage(i)

                    # Only remove the object from the queue if we sent it out correctly.
                    safeToRemove: bool = True
                    for r in resp:
                        if r.status_code != 204:
                            safeToRemove = False

                    if safeToRemove == True:
                        i.remove()

                    sleep(env.discord_delay_seconds)
            except Exception as e:
                self.logger.error(
                    f"Failed to post a message. {i.title}. Status_code: {resp.status_code}. msg: {resp.text}. error {e}"
                )

            sleep(env.discord_delay_seconds)

    def buildMessage(self, article: DiscordQueue) -> None:
        # reset the stored message
        self.tempMessage = DiscordWebhook("placeholder")

        # Extract the webhooks that relate to this site
        webhooks: List[str] = self.getHooks(article.siteName)

        # Make a new webhook with the hooks that relate to this site
        hook: DiscordWebhook = DiscordWebhook(webhooks)
        # hook.content = article.link

        title = article.title
        if len(title) >= 128:
            title = f"{title[0:128]}..."

        # Make a new Embed object
        embed: DiscordEmbed = DiscordEmbed(title=title)  # , url=article.link)

        try:
            authorIcon = self.getAuthorIcon(article.authorImage,
                                            article.siteName)
            embed.set_author(name=article.authorName,
                             url=None,
                             icon_url=authorIcon)
        except:
            pass

        # Discord Embed Description can only contain 2048 characters
        ch = ConvertHtml()
        if article.description != "":
            description: str = str(article.description)
            description = self.convertFromHtml(description)
            description = ch.replaceImages(description, '')
            #description = self.replaceImages(description)
            descriptionCount = len(description)
            if descriptionCount >= 2048:
                description = description[0:2040]
                description = f"{description}..."
            embed.description = description

        # Figure out if we have video based content
        if article.video != "":
            embed.description = "View the video online!"
            embed.set_video(url=article.video,
                            height=article.videoHeight,
                            width=article.videoWidth)

        try:
            if article.thumbnail != "":
                if " " in article.thumbnail:
                    s = article.thumbnail.split(" ")
                    embed.set_image(url=s[0])
                else:
                    embed.set_image(url=article.thumbnail)
        except Exception as e:
            self.logger.warning(
                f"Failed to attach a thumbnail. \r\n {e}\r\n thumbnails: {article.thumbnail}"
            )

        # add the link to the embed
        embed.add_embed_field(name="Link:", value=article.link)

        # Build our footer message
        footer = self.buildFooter(article.siteName)
        footerIcon = self.getFooterIcon(article.siteName)
        embed.set_footer(icon_url=footerIcon, text=footer)

        embed.set_color(color=self.getEmbedColor(article.siteName))

        hook.add_embed(embed)
        self.tempMessage = hook

    def sendMessage(self, article: DiscordQueue) -> List[Response]:
        if article.title != "":
            self.logger.info(f"Discord - Sending article '{article.title}'")
        else:
            self.logger.info(
                f"Discord - Sending article '{article.description}'")
        self.buildMessage(article)
        try:
            res = self.tempMessage.execute()
        except Exception as e:
            self.logger.critical(
                f"Failed to send to Discord.  Check to ensure the webhook is correct. Error: {e}"
            )

        hooks: int = len(self.getHooks(article.siteName))

        # Chcekcing to see if we returned a single responce or multiple.
        if hooks == 1:
            responces = list()
            responces.append(res)
        else:
            responces = res

        return responces

    def getHooks(self, newsSource: str) -> List[str]:
        hooks = list()
        try:
            dwh = DiscordWebHooks(name=newsSource).findAllByName()
            for i in dwh:
                hooks.append(i.key)
            return hooks
        except Exception as e:
            self.logger.critical(
                f"Unable to find DiscordWebhook for {newsSource.siteName}")

    def convertFromHtml(self, msg: str) -> str:
        msg = msg.replace("<h2>", "**")
        msg = msg.replace("</h2>", "**")
        msg = msg.replace("<h3>", "**")
        msg = msg.replace("</h3>", "**\r\n")
        msg = msg.replace("<strong>", "**")
        msg = msg.replace("</strong>", "**\r\n")
        msg = msg.replace("<ul>", "\r\n")
        msg = msg.replace("</ul>", "")
        msg = msg.replace("</li>", "\r\n")
        msg = msg.replace("<li>", "> ")
        msg = msg.replace("&#8220;", '"')
        msg = msg.replace("&#8221;", '"')
        msg = msg.replace("&#8230;", "...")
        msg = msg.replace("<b>", "**")
        msg = msg.replace("</b>", "**")
        msg = msg.replace("<br>", "\r\n")
        msg = msg.replace("<br/>", "\r\n")
        msg = msg.replace("\xe2\x96\xa0", "*")
        msg = msg.replace("\xa0", "\r\n")
        msg = msg.replace("<p>", "")
        msg = msg.replace("</p>", "\r\n")

        msg = self.replaceLinks(msg)
        return msg

    def replaceLinks(self, msg: str) -> str:
        """
        Find the HTML links and replace them with something discord supports.
        """
        # links = re.findall("(?<=<a )(.*)(?=</a>)", msg)
        msg = msg.replace("'", '"')
        links = re.findall("<a(.*?)a>", msg)
        for l in links:
            hrefs = re.findall('href="(.*?)"', l)
            texts = re.findall(">(.*?)</", l)
            if len(hrefs) >= 1 and len(texts) >= 1:
                discordLink = f"[{texts[0]}]({hrefs[0]})"
                msg = msg.replace(f"<a{l}a>", discordLink)
        return msg

    def replaceImages(self, msg: str) -> str:
        imgs = re.findall("<img (.*?)>", msg)
        for i in imgs:
            # Removing the images for now.
            # src = re.findall('src=(.*?)">', i)
            replace = f"<img {i}>"
            msg = msg.replace(replace, "")
        return msg

    def getAuthorIcon(self, authorIcon: str, siteName: str) -> str:
        if authorIcon != "":
            return authorIcon
        else:
            if (siteName == "Final Fantasy XIV"
                    or siteName == "Phantasy Star Online 2"
                    or siteName == "Pokemon Go Hub"):
                res = Icons(site=f"Default {siteName}").findAllByName()
                return res[0].filename
            else:
                s: List[str] = siteName.split(" ")
                if s[0] == "RSS":
                    # res = Icons(site=f"Default {s[1]}").findAllByName()
                    res = Icons(site=siteName).findAllByName()
                else:
                    res = Icons(site=f"Default {s[0]}").findAllByName()
                return res[0].filename

    def buildFooter(self, siteName: str) -> str:
        footer = ""
        end: str = "Brought to you by NewsBot"
        if "reddit" in siteName.lower():
            s = siteName.split(" ")
            footer = f"{end}"
        elif "Youtube" in siteName:
            s = siteName.split(" ")
            footer = f"{s[1]} - {end}"
        elif "Instagram" in siteName or "Twitter" in siteName:
            s = siteName.split(" ")
            if s[1] == "tag":
                footer = f"#{s[2]} - {end}"
            elif s[1] == "user":
                footer = f"{s[2]} - {end}"
        elif "RSS" in siteName:
            s = siteName.split(" ")
            footer = f"{s[1]} - {end}"
        else:
            footer = end

        return footer

    def getFooterIcon(self, siteName: str) -> str:
        if (siteName == "Phatnasy Star Online 2"
                or siteName == "Pokemon Go Hub"
                or siteName == "Final Fantasy XIV"):
            res = Icons(site=f"Default {siteName}").findAllByName()
            return res[0].filename
        else:
            s: List[str] = siteName.split(" ")
            values = (f"Default {s[1]}", f"Default {s[0]}", siteName)
            for v in values:
                r = Icons(site=v).findAllByName()
                if len(r) == 1:
                    res = r
            # if s[0].lower() == 'rss':
            #    res = Icons(site=f"Default {s[1]}").findAllByName()
            # else:
            #    res = Icons(site=f"Default {s[0]}").findAllByName()

            try:
                if res[0].filename != "":
                    return res[0].filename
                else:
                    return ""
            except:
                return ""

    def getEmbedColor(self, siteName: str) -> int:
        # Decimal values can be collected from https://www.spycolor.com
        if "Reddit" in siteName:
            return 16395272
        elif "YouTube" in siteName:
            return 16449542
        elif "Instagram" in siteName:
            return 13303930
        elif "Twitter" in siteName:
            return 1937134
        elif "Final Fantasy XIV" in siteName:
            return 11809847
        elif "Pokemon Go Hub" in siteName:
            return 2081673
        elif "Phantasy Star Online 2" in siteName:
            return 5557497
        elif "Twitch" in siteName:
            return 9718783
        else:
            return 0

Exemplo n.º 9

0

Exibir arquivo

Arquivo: rss.py Projeto: jtom38/newsbot

class RssReader(ISources, BSources):
    def __init__(self) -> None:
        self.logger = Logger(__class__)
        self.uri = "https://example.net/"
        self.siteName: str = "RSS"
        self.feedName: str = ""
        self.RssHelper: IRssContent = IRssContent()
        self.links: List[Sources] = list()
        self.hooks: List[DiscordWebHooks] = list()
        self.sourceEnabled: bool = False
        self.outputDiscord: bool = False
        self.checkEnv(self.siteName)
        pass
   
    def getArticles(self) -> List[Articles]:
        allArticles: List[Articles] = list()
        for l in self.links:
            l: Sources = l

            # Check if this source was disabled in the previous run
            if l.enabled == False:
                continue

            self.logger.debug(f"{l.name} - Checking for updates")
            self.feedName = l.name.split(" ")[1]

            # Cache the root site
            self.uri = l.url
            rsc = RequestSiteContent(url=l.url)
            rsc.getPageDetails()

            # Check if the site icon has been cached
            iconsExists = Icons(site=l.name).findAllByName()
            if len(iconsExists) == 0:
                siteIcon: str = rsc.findSiteIcon(l.url)
                Icons(site=l.name, fileName=siteIcon).update()

            # Check if we have helper code for deeper RSS integration
            # hasHelper: bool = self.enableHelper(l.url)

            # Determin what typ of feed is on the site
            feed = rsc.findFeedLink(siteUrl=l.url)
            if feed["type"] == "atom":
                ap = AtomParser(url=feed["content"], siteName=l.name)
                items = ap.getPosts()
                for i in items:
                    a: Articles = ap.parseItem(i)
                    if a.title != "":
                        allArticles.append(a)

            elif feed["type"] == "rss":
                rp = RssParser(url=feed["content"], siteName=l.name)
                items = rp.getPosts()
                for item in items:
                    a = rp.processItem(item=item, title=l.name)
                    if a.title != "":
                        allArticles.append(a)

            elif feed["type"] == "json":
                jp = JsonParser(url=feed["content"], siteName=l.name)
                items = jp.getPosts()
                for i in items:
                    a: Articles = jp.parseItem(i)
                    if a.title != "":
                        allArticles.append(a)

            else:
                # Unable to find a feed in the page's source code.
                # Asumining that it is RSS
                rp = RssParser(url=l.url, siteName=l.name)
                items = rp.getPosts()
                if len(items) >= 1:
                    for item in items:
                        a = rp.processItem(item=item, title=l.name)
                        if a.title != "":
                            allArticles.append(a)
                else:
                    self.logger.error(
                        f"Unable to find a feed for '{l.name}'.  This source is getting disabled."
                    )
                    for link in self.links:
                        link: Sources = link
                        if link.name == l.name:
                            link.enabled = False

        return allArticles

    def enableHelper(self, url: str) -> bool:
        r: bool = False
        if "engadget.com" in url:
            self.RssHelper = Engadget()
            r = True
        elif "arstechnica" in url:
            self.RssHelper = ArsTechnica()
            r = True
        elif "howtogeek" in url:
            self.RssHelper = HowToGeek()
            r = True
        return r

Exemplo n.º 10

0

Exibir arquivo

class InstagramReader(ISources, BSources, BChrome):
    def __init__(self) -> None:
        self.logger = Logger(__class__)
        self.uri = "https://www.instagram.com/"
        self.baseUri = self.uri
        self.siteName: str = "Instagram"
        self.links: List[Sources] = list()
        self.hooks: List[DiscordWebHooks] = list()
        self.sourceEnabled: bool = False
        self.outputDiscord: bool = False
        self.currentLink: Sources = Sources()
        self.checkEnv(self.siteName)
        pass

    def getArticles(self) -> List[Articles]:
        self.driver = self.driverStart()
        allArticles: List[Articles] = list()

        for site in self.links:
            self.currentLink = site

            nameSplit = site.name.split(" ")
            igType = nameSplit[1]
            self.siteName = f"Instagram {nameSplit[2]}"
            self.logger.debug(f"Instagram - {nameSplit[2]} - Checking for updates.")
            
            #self.uri = f"{self.baseUri}directory/hashtags/"
            self.uri = f"https://www.instagram.com/directory/profiles/0-0/"
            self.driverGoTo(self.uri)

            # Figure out if we are looking for a user or tag
            if igType == "user":
                #self.uri = f"{self.baseUri}{nameSplit[2]}"
                WebDriverWait(driver=self.driver, timeout=5)
                self.driver.save_screenshot('ig_hashtag.png')
                res = self.driver.find_element_by_xpath('/html/body/div[1]/section/nav/div[2]/div/div/div[2]/div/div/span[2]')
                links = self.getUserArticleLinks()
            elif igType == "tag":
                self.uri = f"{self.baseUri}explore/tags/{nameSplit[2]}/"
                self.driverGoTo(self.uri)
                links = self.getTagArticleLinks()

            for l in links:
                # check if we have already seen the url
                a = Articles(url=l)
                if a.exists() == False:
                    # Get the content
                    allArticles.append(self.getPostInfo(l))

            self.logger.debug(f"{self.siteName} - Finished checking.")
            try:
                pass
            except Exception as e:
                self.logger.error(
                    f"Failed to parse articles from {self.siteName}.  Chances are we have a malformed responce. {e}"
                )

        self.driverClose()
        self.siteName = "Instagram"

        return allArticles

    def getUserArticleLinks(self) -> List[str]:
        """
        This reviews a users page to find all the links that relate to each post they have made.
        """
        links = list()
        try:
            #source = self.getContent()
            soup: BeautifulSoup = self.getParser(requestsContent=self.getContent())
            res = soup.find_all(name="article")
            for i in res[0].contents[0].contents[0].contents:
                for l in i.contents:
                    links.append(
                        f"https://www.instagram.com{l.contents[0].attrs['href']}"
                    )

        except Exception as e:
            self.logger.error(e)
            self.driverClose()

        return links

    def getTagArticleLinks(self) -> List[str]:
        """
        This checks the tag for the newst posts.
        """
        links = list()

        try:
            #source: str = self.getContent()
            soup: BeautifulSoup = self.getParser(requestsContent=self.getContent())
            res = soup.find_all(name="article")

            # Top Posts
            links = self.getArticleLinks(
                res[0].contents[0].contents[1].contents[0].contents, links
            )

            # Recent
            # TODO Need a way to define options on Instagram Tags.  One might not want EVERYTHING.
            # links = self.getArticleLinks(res[0].contents[2].contents[0].contents, links)

        except Exception as e:
            self.logger.error(f"Driver ran into a problem pulling links from a tag. {e}")

        return links

    def getArticleLinks(self, soupList: List, linkList: List) -> List[str]:

        for i in soupList:
            try:
                for l in i.contents:
                    linkList.append(
                        f"https://www.instagram.com{l.contents[0].attrs['href']}"
                    )
            except Exception as e:
                self.logger.error(f"Failed to extract post link. {e}")
        return linkList

    def getPostInfo(self, link: str) -> Articles:
        a = Articles(url=link, siteName=self.currentLink.name, tags="instagram, posts")

        self.driverGoTo(link)
        #source = self.getContent()
        soup = self.getParser(requestsContent=self.getContent())

        nameSplit = self.currentLink.name.split(" ")
        if nameSplit[1] == "tag":
            a.tags += f", tag, {nameSplit[2]}"
        elif nameSplit[1] == "user":
            a.tags += f", user, {nameSplit[2]}"

        # Get the title from the post
        title = soup.find_all(name="span", attrs={"class", ""})

        # Get the poster Avatar
        authorImages = soup.find_all(name="img")
        for i in authorImages:
            try:
                if "profile picture" in i.attrs["alt"]:
                    a.authorImage = i.attrs["src"]
                    break
            except:
                pass

        # get posters name
        authorName = soup.find_all(
            name="a", attrs={"class": "sqdOP yWX7d _8A5w5 ZIAjV"}
        )
        a.authorName = authorName[0].text

        # Check the title to make sure it was not just all tags... someone did that! - Done
        # TODO Need a better placeholder value
        cleanTitle = self.cleanTitle(title[1].text)
        if cleanTitle == "":
            a.title = "Instagram Post"
        else:
            a.title = cleanTitle

        # improve the regex to collect tags.  It nuked out a title... oops - Made an adjustment
        tags = self.getTags(title[1].text)
        if tags != "":
            a.tags = tags

        # Get when the post went up
        dt = soup.find_all(name="time", attrs={"class": "FH9sR Nzb55"})
        a.pubDate = dt[0].attrs["datetime"]

        # Video link
        hasVideo = soup.find_all(
            name="span", attrs={"class": "qBUYS _7CSz9 FGFB7 videoSpritePlayButton"}
        )
        hasCollection = soup.find_all(name="button", attrs={"class": "_6CZji"})
        if len(hasVideo) >= 1:
            video = soup.find(name="video", attrs={"class": "tWeCl"})
            a.description = "This post contains a video, view it online!"
            a.video = video.attrs["src"]

        # check if it contains multiple pictures
        elif len(hasCollection) >= 1:
            a.description = "This post contains multiple pictures, view them online!"
            a.thumbnail = self.getPicture(soup)
            # TODO Figure out if the collection can be stored.
            # Its not like Discord can present them all with a single post.
            # self.getCollection(soup)

        # Get a single picture
        else:
            a.thumbnail = self.getPicture(soup)
        return a

    def getPicture(self, soup: BeautifulSoup) -> str:
        images = soup.find_all(name="img")
        for i in images:
            try:
                if "photo by " in i.attrs["alt"].lower():
                    return i.attrs["src"]
                elif "photo shared by" in i.attrs["alt"].lower():
                    return i.attrs["src"]
            except:
                pass

        # Checking for images that have people/objects tagged
        for i in soup.find_all(name="img", attrs={"class": "FFVAD"}):
            # we are just going to take the first one that shows up in the list.
            return i.attrs["src"]

#    def __driverGet__(self, uri: str) -> None:
#        try:
#            self.driver.get(uri)
#            self.driver.implicitly_wait(5)
#        except Exception as e:
#            self.logger.error(f"Driver failed to get {uri}. Error: {e}")
#
#    def __close__(self) -> None:
#        try:
#            self.driver.close()
#        except Exception as e:
#            self.logger.error(f"Driver failed to close. Error: {e}")

    def getTags(self, text: str) -> str:
        t = ""
        # res = findall('#[a-zA-Z0-9].*', text)
        res = findall("[#](.*?)[ ]", text)
        if len(res) >= 1:
            # tags = res[0].split('#')
            for i in res:
                try:
                    i: str = i.replace(" ", "")
                    if i != "":
                        t += f"{i}, "
                except:
                    pass
        return t

    def cleanTitle(self, text: str) -> str:
        """
        This will check the text given for Instagram tags. If they are found, remove them.
        If no tags are found from regex, it will return the given text.
        """
        t = ""
        res = findall("#[a-zA-Z0-9].*", text)
        if len(res) >= 1:
            t = text.replace(res[0], "")
            return t
        else:
            return text

Exemplo n.º 11

0

Exibir arquivo

Arquivo: pso2.py Projeto: jtom38/newsbot

class PSO2Reader(ISources, BSources):
    def __init__(self) -> None:
        self.logger = Logger(__class__)
        self.uri: str = "https://pso2.com/news"
        self.siteName: str = "Phantasy Star Online 2"
        self.authorName: str = f"{self.siteName} Offical Site"
        self.links = list()
        self.hooks = list()
        self.sourceEnabled: bool = False
        self.outputDiscord: bool = False
        self.checkEnv(self.siteName)
        pass

    def getArticles(self) -> List[Articles]:
        allArticles: List[Articles] = list()
        for site in self.links:
            self.logger.debug(f"{site.name} - Checking for updates.")
            self.uri = site.url

            siteContent: Response = self.getContent()
            if siteContent.status_code != 200:
                self.logger.error(
                    f"The returned content from {self.siteName} is either malformed or incorrect.  We got the wrong status code.  Expected 200 but got {siteContent.status_code}"
                )
            page: BeautifulSoup = self.getParser(requestsContent=siteContent)

            try:
                for news in page.find_all("li", {"class", "news-item all sr"}):
                    a = Articles(siteName=self.siteName, authorName=self.authorName)
                    # a.siteName = "Phantasy Star Online 2"
                    a.thumbnail = re.findall(
                        "url[(](.*?)[)]", news.contents[1].attrs["style"]
                    )[0]

                    nc = news.contents[3].contents
                    a.title = nc[1].text
                    a.description = nc[3].text

                    bottom = nc[5].contents
                    a.tags = bottom[1].text
                    a.pubDate = bottom[5].text

                    link = re.findall(
                        r"ShowDetails\('(.*?)'", bottom[7].attrs["onclick"],
                    )[0]
                    # tells us the news type and news link
                    cat = bottom[1].text.lower()
                    if " " in cat:
                        cat = cat.replace(" ", "-")

                    a.url = f"{self.uri}/{cat}/{link}"

                    allArticles.append(a)
            except UnableToFindContent as e:
                self.logger.error(f"PSO2 - Unable to find articles. {e}")

        self.logger.debug(f"{site.name} - Finished collecting articles")
        return allArticles

    def findNewsLinks(self, page: BeautifulSoup) -> BeautifulSoup:
        try:
            news = page.find_all(
                "ul", {"class", "news-section all-news announcement-section active"}
            )
            if len(news) != 1:
                self.logger.error(
                    f"Collected results from news-section but got more results then expected."
                )

            return news
        except Exception as e:
            self.logger.error(
                f"Failed to find news-section.  Did the site layout change? {e}"
            )

    def findListItems(self, news: BeautifulSoup) -> BeautifulSoup:
        try:
            for article in news.find_all("li", {"class", "news-item all sr"}):
                print(article)
            pass
        except UnableToFindContent as e:
            self.logger.error(f"{e}")

Exemplo n.º 12

0

Exibir arquivo

Arquivo: youtube.py Projeto: jtom38/newsbot

class YoutubeReader(ISources, BSources, BChrome):
    def __init__(self):
        self.logger= Logger(__class__)
        self.uri: str = "https://youtube.com"
        self.siteName: str = "Youtube"
        self.feedBase: str = "https://www.youtube.com/feeds/videos.xml?channel_id="

        self.links: List[Sources] = list()
        self.hooks: List[DiscordWebHooks] = list()

        self.sourceEnabled: bool = False
        self.outputDiscord: bool = False

        self.checkEnv(self.siteName)
        pass

    def getArticles(self) -> List[Articles]:
        self.logger.debug(f"Checking YouTube for new content")
        self.driver = self.driverStart()

        allArticles: List[Articles] = list()

        for site in self.links:
            s = site.name.split(" ")
            self.authorName = ""
            self.authorImage = ""
            self.logger.debug(f"{site.name} - Checking for updates")

            # pull the source code from the main youtube page
            channelID = Cache(key=f"youtube {s[1]} channelID").find()
            if channelID == "":
                self.uri = f"{site.url}"
                self.driverGoTo(self.uri)
                #self.driver.save_screenshot("youtube_step1.png")
                siteContent: str = self.driverGetContent()
                page: BeautifulSoup = self.getParser(seleniumContent=siteContent)
                channelID: str = self.getChannelId(page)
                Cache(key=f"youtube {s[1]} channelID", value=channelID).add()

                # Not finding the values I want with just request.  Time for Chrome.
                # We are collecting info that is not present in the RSS feed.
                # We are going to store them in the class.
                try:
                    authorImage = page.find_all(name="img", attrs={"id": "img"})
                    self.authorImage = authorImage[0].attrs["src"]
                    Cache(
                        key=f"youtube {s[1]} authorImage", value=self.authorImage
                    ).add()
                except Exception as e:
                    self.logger.error(
                        f"Failed to find the authorImage for {s[1]}.  CSS might have changed. {e}"
                    )
                authorImage.clear()

                try:
                    authorName = page.find_all(
                        name="yt-formatted-string",
                        attrs={"class": "style-scope ytd-channel-name", "id": "text"},
                    )
                    self.authorName = authorName[0].text
                    Cache(key=f"youtube {s[1]} authorName", value=self.authorName).add()
                except Exception as e:
                    self.logger.error(
                        f"Failed to find the authorName for {s[1]}.  CSS might have changed. {e}"
                    )
                authorName.clear()
            else:
                self.authorName = Cache(key=f"youtube {s[1]} authorName").find()
                self.authorImage = Cache(key=f"youtube {s[1]} authorImage").find()

            # Generatet he hidden RSS feed uri
            self.uri = f"{self.feedBase}{channelID}"
            siteContent = self.getContent()
            page = self.getParser(siteContent)

            root = page.contents[2].contents
            for item in root:
                if item.name == "entry":
                    a = Articles()
                    a.url = item.contents[9].attrs["href"]
                    a.video = a.url
                    a.title = item.contents[7].text
                    a.pubDate = item.contents[13].text
                    a.siteName = site.name
                    a.thumbnail = item.contents[17].contents[5].attrs["url"]
                    a.authorImage = self.authorImage
                    a.authorName = self.authorName

                    allArticles.append(a)

        self.driverClose()
        return allArticles

    def getChannelId(self, page: BeautifulSoup) -> str:
        # siteContent: Response = self.getContent()
        # page: BeautifulSoup = self.getParser(siteContent)

        meta = page.find_all("meta")
        for i in meta:
            try:
                if i.attrs["itemprop"] == "channelId":
                    channelId = i.attrs["content"]
                    return channelId
            except:
                pass

        return ""