def main(database: str, url_list_file: str): big_word_list = [] print("we are going to work with " + database) print("we are going to scan " + url_list_file) urls = url_utilities.load_urls_from_file(url_list_file) for url in urls: print("reading " + url) page_content = url_utilities.load_page(url=url) words = url_utilities.scrape_page(page_contents=page_content) big_word_list.extend(words) print(len(big_word_list)) # database code os.chdir(os.path.dirname(__file__)) # make sure the db file is in the same directory of the .py file path = os.path.join(os.getcwd(), "words.db") database_utilities.create_database(database_path=path) database_utilities.save_words_to_database(database_path=path, word_list=big_word_list) filename = 'docfile.docx' text_data = read_word.getText(filename) print(text_data)
def main(database, url_list_file): big_word_list = [] print('Db: ' + database) print('input list: ' + url_list_file) urls = url_utilities.load_urls_from_file(url_list_file) for url in urls: print('reading: ', url) page_content = url_utilities.load_page(url=url) words = url_utilities.scrape_page(page_contents=page_content) big_word_list.extend(words) database_utilities.create_database(database) print('length words is: ', len(big_word_list)) database_utilities.save_words_to_database(big_word_list[:250000])
def main(database: str, url_list_file: str): big_word_list = [] print("we are going to work with " + database) print("we are going to scan " + url_list_file) urls = url_utilities.load_urls_from_file(url_list_file) for url in urls: print("reading " + url) page_content = url_utilities.load_page(url=url) words = url_utilities.scrape_page(page_content) big_word_list.extend(words) os.chdir(os.path.dirname(__file__)) path = os.path.join(os.getcwd(), "words.db") database_utilities.create_database(database_path=path) database_utilities.save_words_to_database(database_path=path, words=big_word_list)
def main(database: str, url_list_file: str): big_word_list = [] print('We are going to work with ' + database) print('We will scan ' + url_list_file) urls = url_utilities.load_urls_from_files(url_list_file) for url in urls: print('Reading ' + url) page_content = url_utilities.load_page(url=url) words = url_utilities.scrape_page(page_contents=page_content) big_word_list.extend(words) # database code os.chdir(os.path.dirname(__file__)) path = os.path.join(os.getcwd(), 'words.db') database_utilities.create_database(database_path=path) database_utilities.save_words_to_database(database_path=path, words_list=big_word_list)
def main(database: str, url_list_file: str): big_word_list = [] print('we are going to work with ' + database) print('we are going to scan ' + url_list_file) urls = url_utilities.load_urls_from_file(url_list_file) for url in urls: print("reading " + url) page_content = url_utilities.load_page(url=url) words = url_utilities.scrape_page(page_contents=page_content) big_word_list.extend(words) # database code - mitigating cross-platform file path issues # dunder __file__ gives the location of the file we're currently running os.chdir(os.path.dirname(__file__)) path = os.path.join(os.getcwd(), "words.db") database_utilities.create_database(database_path=path) database_utilities.save_words_to_database(database_path=path, words_list=big_word_list)
def main(): database_utilities.create_database() # Creates the database # Open the file with twitter handles to query tweets twitter_handles_file = open('handles.txt', 'r') try: lines = twitter_handles_file.readlines() # read the lines in handles.txt except: print("handles.txt not found in " + os.path.join(os.path.dirname(__file__), '..')) # Saves Tweets to database, retrieving tweets (via API) from handle 1 at a time for handle in lines: tweets = got_tweet_methods.return_tweets(handle) # return list of tweets from handle # print(tweets) for tweet in tweets: b = TextBlob(tweet.full_text) # return TextBlob semantic info for individual tweet string tweet_list = [handle, tweet.full_text, tweet.created_at, b.sentiment[0]] # save attributes to list database_utilities.save_tweets_to_database(tweet_list) # insert list into database as row print(handle + " records saved to database")
def main(database: str, url_list_file: str): big_word_list = [] print("We are going to work with " + database) print("We are going to scan " + url_list_file) urls = url_utilities.load_urls_from_file(url_list_file) for url in urls: print("reading " + url) page_content = url_utilities.load_page(url=url) words = url_utilities.scrape_page( page_contents=page_content) # this gives a raw list of words big_word_list.extend(words) # database code # this is a cross platform and so path mechanism is important os.chdir(os.path.dirname(__file__)) path = os.path.join(os.getcwd(), "words.db") database_utilities.create_database(database_path=path) database_utilities.save_words_to_database(database_path=path, words_list=big_word_list)
def test_create_database(self): results = create_database("word.db") assert results == True
def test_create_database(self): results = create_database("word.db") self.assertTrue(results)
def test_create_database(): os.chdir(os.path.dirname(__file__)) path = os.path.join(os.getcwd(), "words.db") create_database(database_path=path)