예제 #1
0
def main(database: str, url_list_file: str):
    big_word_list = []
    print("we are going to work with " + database)
    print("we are going to scan " + url_list_file)
    urls = url_utilities.load_urls_from_file(url_list_file)

    for url in urls:
        print("reading " + url)
        page_content = url_utilities.load_page(url=url)
        words = url_utilities.scrape_page(page_contents=page_content)
        big_word_list.extend(words)

    print(len(big_word_list))

    # database code
    os.chdir(os.path.dirname(__file__))
    # make sure the db file is in the same directory of the .py file
    path = os.path.join(os.getcwd(), "words.db")
    database_utilities.create_database(database_path=path)
    database_utilities.save_words_to_database(database_path=path,
                                              word_list=big_word_list)

    filename = 'docfile.docx'
    text_data = read_word.getText(filename)
    print(text_data)
예제 #2
0
def main(database, url_list_file):
    big_word_list = []
    print('Db: ' + database)
    print('input list: ' + url_list_file)
    urls = url_utilities.load_urls_from_file(url_list_file)
    for url in urls:
        print('reading: ', url)
        page_content = url_utilities.load_page(url=url)
        words = url_utilities.scrape_page(page_contents=page_content)
        big_word_list.extend(words)

    database_utilities.create_database(database)
    print('length words is: ', len(big_word_list))
    database_utilities.save_words_to_database(big_word_list[:250000])
예제 #3
0
def main(database: str, url_list_file: str):
    big_word_list = []
    print("we are going to work with " + database)
    print("we are going to scan " + url_list_file)
    urls = url_utilities.load_urls_from_file(url_list_file)
    for url in urls:
        print("reading " + url)
        page_content = url_utilities.load_page(url=url)
        words = url_utilities.scrape_page(page_content)
        big_word_list.extend(words)
    os.chdir(os.path.dirname(__file__))
    path = os.path.join(os.getcwd(), "words.db")
    database_utilities.create_database(database_path=path)
    database_utilities.save_words_to_database(database_path=path,
                                              words=big_word_list)
예제 #4
0
def main(database: str, url_list_file: str):
    big_word_list = []
    print('We are going to work with ' + database)
    print('We will scan ' + url_list_file)
    urls = url_utilities.load_urls_from_files(url_list_file)
    for url in urls:
        print('Reading ' + url)
        page_content = url_utilities.load_page(url=url)
        words = url_utilities.scrape_page(page_contents=page_content)
        big_word_list.extend(words)

    # database code
    os.chdir(os.path.dirname(__file__))
    path = os.path.join(os.getcwd(), 'words.db')
    database_utilities.create_database(database_path=path)
    database_utilities.save_words_to_database(database_path=path,
                                              words_list=big_word_list)
예제 #5
0
def main(database: str, url_list_file: str):
    big_word_list = []
    print('we are going to work with ' + database)
    print('we are going to scan ' + url_list_file)
    urls = url_utilities.load_urls_from_file(url_list_file)
    for url in urls:
        print("reading " + url)
        page_content = url_utilities.load_page(url=url)
        words = url_utilities.scrape_page(page_contents=page_content)
        big_word_list.extend(words)

    # database code - mitigating cross-platform file path issues
    # dunder __file__ gives the location of the file we're currently running
    os.chdir(os.path.dirname(__file__))
    path = os.path.join(os.getcwd(), "words.db")
    database_utilities.create_database(database_path=path)
    database_utilities.save_words_to_database(database_path=path,
                                              words_list=big_word_list)
예제 #6
0
def main():
    database_utilities.create_database()  # Creates the database

    # Open the file with twitter handles to query tweets
    twitter_handles_file = open('handles.txt', 'r')
    try:
        lines = twitter_handles_file.readlines()  # read the lines in handles.txt
    except:
        print("handles.txt not found in " + os.path.join(os.path.dirname(__file__), '..'))

    # Saves Tweets to database, retrieving tweets (via API) from handle 1 at a time
    for handle in lines:
        tweets = got_tweet_methods.return_tweets(handle)  # return list of tweets from handle
        # print(tweets)
        for tweet in tweets:
            b = TextBlob(tweet.full_text)  # return TextBlob semantic info for individual tweet string
            tweet_list = [handle, tweet.full_text, tweet.created_at, b.sentiment[0]]  # save attributes to list
            database_utilities.save_tweets_to_database(tweet_list)  # insert list into database as row
        print(handle + " records saved to database")
예제 #7
0
def main(database: str, url_list_file: str):
    big_word_list = []
    print("We are going to work with " + database)
    print("We are going to scan " + url_list_file)
    urls = url_utilities.load_urls_from_file(url_list_file)
    for url in urls:
        print("reading " + url)
        page_content = url_utilities.load_page(url=url)
        words = url_utilities.scrape_page(
            page_contents=page_content)  # this gives a raw list of words
        big_word_list.extend(words)

    # database code
    # this is a cross platform and so path mechanism is important
    os.chdir(os.path.dirname(__file__))
    path = os.path.join(os.getcwd(), "words.db")
    database_utilities.create_database(database_path=path)
    database_utilities.save_words_to_database(database_path=path,
                                              words_list=big_word_list)
 def test_create_database(self):
     results = create_database("word.db")
     assert results == True
예제 #9
0
 def test_create_database(self):
     results = create_database("word.db")
     self.assertTrue(results)
예제 #10
0
def test_create_database():
    os.chdir(os.path.dirname(__file__))
    path = os.path.join(os.getcwd(), "words.db")
    create_database(database_path=path)