def RunExtraction(self, language: str = 'ar') -> str: if language.lower() == 'ar': json_data = self.MakeRequest(target=self.AR_searchEngine, json=True) for data in json_data.get('contentItems'): title = data.get('headline') published_date = data.get('date') category = data.get('category') link = self.CreateNewsLink(data.get('id'), data.get('sectionUrl'), data.get('urlFriendlySuffix')) self.Results.get("SkyNews").append( dict(title=title, published_date=published_date, category=category, link=link)) if config.DEBUG: print("title: ", title) print("published date: ", published_date) print("category: ", category) print("Link: ", link) threading.Thread(target=SendToChannel, args=(title, published_date, category, link)).start() # Send News to telegram write_json(config.EnvironmentPath(), 'skynews', self.Results) return ''
def performDataExtraction(self, links: list): DataFetcherQueue = queue.Queue() threads = [] for link in links: DataFetcherQueue.put(link) DataFetcherThread = threading.Thread( target=self.extractData, args=(DataFetcherQueue.get(), )) threads.append(DataFetcherThread) for thread_starter in threads: thread_starter.start() for thread_joiner in threads: thread_joiner.join() write_json(config.EnvironmentPath(), 'alarabiya', self.ResultsData)
def performDataExtraction(self, links: list): try: DataFetcherQueue = queue.Queue() threads = [] for link in links: DataFetcherQueue.put(link) DataFetcherThread = threading.Thread(target=self.extractData, args=(DataFetcherQueue.get(),)) threads.append(DataFetcherThread) for thread_start in threads: thread_start.start() for thread_join in threads: thread_join.join() except BaseException as e: config.debug(level=1, data=e) write_json(config.EnvironmentPath(), 'rt', self.Results)
def EN_CNN_Search(self, query: str): results = self.MakeRequest(target=self.API_CNN_EN.format( query.strip()), json=True) for news in results.get('result'): title = news.get('headline') tags = news.get('section') published_date = news.get('firstPublishDate') link = news.get('url') self.Results.get("cnn").append( dict(title=title, tags=tags, published_date=published_date, link=link)) write_json(config.EnvironmentPath(), 'cnn', self.Results)
def parseResults(self): """ A method to parse the json object to actual data and send the results to telegram channel """ try: # json object from API response json_data = self.convertToJson() # iterate in news list for item in json_data.get('items'): # checks if the link is a news and not anything else if not self.Ensure_Rules(link=item.get('link'), rule='category'): # gather news metadata from API response news_link = item.get('link') news_title = item.get('title') news_published_date = item.get('pagemap').get( 'metatags')[0].get('dcterms.created') news_tags = None # Get news category using two possible dict keys if item.get('pagemap').get('metatags')[0].get( 'classification-tags'): news_tags = item.get('pagemap').get('metatags')[0].get( 'classification-tags').split(',') else: if hasattr( item.get('pagemap').get('metatags')[0].get( 'classification-isa'), 'split'): news_tags = item.get('pagemap').get('metatags')[ 0].get('classification-isa').split(',') # Print data to user title = news_title published_date = news_published_date link = news_link category = news_tags self.Results.get("foxnews").append( dict(title=title, published_date=published_date, category=category, link=link)) if config.DEBUG: if config.DEBUG: print("Link", news_link) print("Title", news_title) print("Categories", news_tags) print("Published date:", news_published_date) except BaseException as e: print(e) write_json(config.EnvironmentPath(), 'foxnews', self.Results)
def performDataExtraction(self, links: list, language): try: DataFetcherQueue = queue.Queue() threads = [] for link in links: DataFetcherQueue.put(link) DataFetcherThread = threading.Thread( target=self.extractData, args=(DataFetcherQueue.get(), language)) threads.append(DataFetcherThread) for thread_starter in threads: thread_starter.start() for thread_joiner in threads: thread_joiner.join() except BaseException as e: print(e) write_json(config.EnvironmentPath(), 'aljazeera', self.ResultsData)