def googleLinks(topic): googlenews = GoogleNews() googlenews.set_lang('en') googlenews.set_period('1d') googlenews.set_encode('utf-8') article = googlenews.get_news(topic) links = googlenews.get_links()[:5] actualLinks = list() for l in links: l = "http://" + l print(l) actualLinks.append( requests.get(l).url ) return actualLinks
def googleNewsApi(request, word): googlenews = GoogleNews() googlenews.set_lang('en') googlenews.set_period('7d') googlenews.set_encode('utf-8') googlenews.get_news(str(word)) googlenews.total_count() resultsGoogleNews = googlenews.results() #print(resultsGoogleNews) #print(googlenews.total_count()) #TWITTER consumer_key = 'sz6x0nvL0ls9wacR64MZu23z4' consumer_secret = 'ofeGnzduikcHX6iaQMqBCIJ666m6nXAQACIAXMJaFhmC6rjRmT' access_token = '854004678127910913-PUPfQYxIjpBWjXOgE25kys8kmDJdY0G' access_token_secret = 'BC2TxbhKXkdkZ91DXofF7GX8p2JNfbpHqhshW1bwQkgxN' # create OAuthHandler object auth = tweepy.OAuthHandler(consumer_key, consumer_secret) # set access token and secret auth.set_access_token(access_token, access_token_secret) # create tweepy API object to fetch tweets api = tweepy.API(auth) date_since = datetime.today().strftime('%Y-%m-%d') print(date_since) #tweets = api.search(str("bitcoin"), count=1) tweets = tweepy.Cursor(api.search, q=str(word), lang="en", since=date_since).items(100) """print(tweets.__dict__['page_iterator'].__dict__) for tweet in tweets: print(tweet) print(tweet.id)""" #return googlenews """for result in resultsGoogleNews: title = result['title'] date = result['date'] link = result['link'] source = result['site'] news = {'title':title, 'date': date, 'link': link, 'site':site} """ return render(request, 'homepage.html', { 'news': resultsGoogleNews, 'tweets': tweets })
def GNews(): gn = GoogleNews() gn.set_period('7d') list = [ "INDIA", "USA", "UK", "AUSTRALIA", "FRANC", "UGANDA", "PAKISTAN", "MALDIVES", "CELEBRITY" ] gn.search(random.choice(list)) rs = gn.results() for i in rs: data = i['title'] data += i['desc'] data += i['link'] return data
def scrape_the_news(): user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36' config = Config() config.browser_user_agent = user_agent topiclist = NLP_news() print(topiclist[0]) googlenews = GoogleNews() googlenews.set_lang('en') googlenews.set_encode('utf-8') googlenews.set_period('7d') googlenews.get_news(topiclist[0]) result = googlenews.results() googlenews.clear() df = pd.DataFrame(result) df = df.drop(['date', 'media'], axis=1) df.columns = ['Date', 'Summary', 'Image', 'Link', 'Site', 'Title'] df = df[['Title', 'Summary', 'Image', 'Link', 'Date', 'Site']] conn = psycopg2.connect("dbname=EdTech user=postgres password=edtech123") curr = conn.cursor() for i, row in df.iterrows(): try: row.Link = 'https://' + row.Link columns = row.keys() values = [row[column] for column in columns] insert_statement = "INSERT INTO scrapenews_newslist VALUES (nextval('scrapenews_newslist_id_seq'::regclass),%s, %s, %s, %s, %s, %s)" curr.execute(insert_statement, tuple(values)) except: print('could not add row', i) conn.commit() curr.close() conn.close()
def googlenews_recovery(app_config): googlenews = GoogleNews() googlenews.set_lang(app_config["lang"]) googlenews.set_period(app_config["period"]) googlenews.get_news(app_config["keywords"]) return googlenews
@author: Daten Master ''' from GoogleNews import GoogleNews googlenews = GoogleNews() ############################################################## ################ Definition der Suche ######################## ############################################################## googlenews.set_encode('utf-8') # Sprache definieren (z.B. 'de'=deutsch; 'en'=englisch; ...) googlenews.set_lang('de') # nach Periode Filtern (z.B. News nicht älter als 1 Tag) googlenews.set_period('1d') #googlenews.set_time_range('15/01/2021','17/01/2021') # Suche ausfuehren googlenews.get_news('Wetter Hamburg') ############################################################## ######################## Ausgabe ############################# ############################################################## # Alle Infos (Titel, Beschreibung, Zeit, Datum, Link, Quelle) #print(googlenews.results()) # News-Kopfzeile iterative durchlaufen #for i in googlenews.results(): # print(i['title'])