示例#1
0
 def __init__(self):
     self.validator = ValidatorClass(pathToPickleFiles)
     self.geo_finder = GeolocationFinder()
     self.database_handler = DatabaseHandler(dbURL, dbPort, dbUser,
                                             dbPasswd)
示例#2
0
    except IOError:
        print("Error, failed to write to file: " + path + filename)


def write_to_file(filename, res):
    try:
        f = open(filename, 'w')
        for l in res:
            f.write(l['text'] + "\n")
        f.close()
    except IOError:
        print("cannot open file: " + filename)


print("Updating test data ... ")
dbh = DatabaseHandler()
res = dbh.get_tweets_with_sentiment('has flu')
print("Positive file updated")
write_to_file('positive.txt', res)

res = dbh.get_tweets_with_sentiment('no flu')
write_to_file('negative.txt', res)
print("Negative file updated")

# J is adjective, r is adverb, and v is verb
allowed_word_types = ['J']

documents = []
all_words = []
try:
    positive_tweets = open("../classifiers/positive.txt", "r").read()
 def setUp(self):
     # test_dbh sets up a db on localhost
     self.test_dbh = DatabaseHandler()
     self.setup_test_dbh_initial_contents()
示例#4
0
class DataCollector(StreamListener):
    def __init__(self):
        self.validator = ValidatorClass(pathToPickleFiles)
        self.geo_finder = GeolocationFinder()
        self.database_handler = DatabaseHandler(dbURL, dbPort, dbUser,
                                                dbPasswd)

    def on_data(self, raw_data):
        """
        When Listener detects a tweet with the keywords this method is called to handle the tweet.
        Sequence:
        - Load the json data
        - Validate tweet
        - Store if valid
        :param raw_data:
        :return: nothing
        """
        try:
            # Load the raw data
            json_data = json.loads(raw_data)

            # Get some required details from json data
            user_id, text, language, location, timestamp = self.get_data_from_json_data(
                json_data)

            # Check if text in tweet is valid before processing
            if text != 'invalid' and self.validator.validate_text_from_tweet(
                    text):
                record = {'created': timestamp, 'user_language': language}

                # Check if tweet contains a valid location
                if self.validator.validate_location(
                        location) and location != 'None':
                    # get location details of user
                    address, latitude, longitude = self.geo_finder.get_location(
                        location)

                    # If location has not returned None for lat and long, construct and record the map point in database
                    if (latitude is not None) and (longitude is not None) \
                            and (latitude != 'None') and (longitude != 'None'):
                        self.add_to_record(address, latitude, longitude,
                                           record)
                        self.record_map_point(latitude, longitude, timestamp,
                                              text)
                # Check if language is english
                if self.language_is_english(language):
                    self.database_handler.write_english_tweet_to_database(
                        record)
        except TypeError:
            logger.logging.exception('Error during on_data method')
        except ValueError:
            logger.logging.exception('Error during on_data method')

    def language_is_english(self, language):
        """
        Checks is language provided is english
        :param language:
        :return boolean value True/False
        """
        return (language == 'en') or (language == 'en-gb')

    def add_to_record(self, address, latitude, longitude, record):
        """
        Add location values to record which is a dictionary
        :param address: string value for address
        :param latitude: float value for latitude
        :param longitude: float value for longitude
        :param record: dictionary
        :return: nothing
        """
        record['address'] = address
        record['latitude'] = latitude
        record['longitude'] = longitude

    def record_map_point(self, latitude, longitude, timestamp, text):
        """
        creats a record(dictionary) for map point and calls the database handler to store it
        :param latitude: float value for latitude
        :param longitude: float value for longitude
        :param timestamp: string value for timestamp
        :param text: string value for text
        :return: nothing
        """
        map_point_record = {
            'date': int(timestamp),
            'lat': latitude,
            'long': longitude,
            'text': text
        }
        self.database_handler.write_map_point_to_database(map_point_record)

    def get_data_from_json_data(self, json_data):
        """
        extracts appropriate data from json data, if KeyError occurs sets attribute to unknown or none
        :param json_data:
        :return: user_id(string), text(string), user_language(string), location(string), timestamp(string)
        """
        try:
            user_id = json_data['user']['id_str']

        except KeyError:
            logger.logging.exception('KeyError while accessing user ID')
            user_id = 'unknown'
        try:
            user_language = json_data['user']['lang']
        except KeyError:
            logger.logging.exception('KeyError while accessing user language')
            user_language = 'unknown'
        try:
            location = json_data['user']['location']
        except KeyError:
            logger.logging.exception('KeyError while accessing user location')
            location = None
        try:
            text = json_data['text'].lower()
        except KeyError:
            # if keyError is raised set the text to a banned word so it will not be accepted
            text = 'invalid text'
            logger.logging.exception('KeyError while accessing tweet text')
        # Get time tweet picked up
        timestamp = self.get_timestamp()

        return user_id, text, user_language, location, timestamp

    def get_timestamp(self):
        """
        creates a timestamp in string format
        :return: timestamp(string)
        """
        now = datetime.datetime.now()
        day = str(now.day)
        month = str(now.month)
        year = str(now.year)

        if len(day) == 1:
            day = '0' + day
        if len(month) == 1:
            month = '0' + month
        timestamp = year + month + day
        return timestamp

    def on_error(self, status_code):
        logging.error('Twitter Stream returned status code:' +
                      str(status_code))
 def setUp(self):
     # test_dbh sets up a db on localhost
     self.test_dbh = DatabaseHandler()
     self.setup_test_dbh_initial_contents()
class DatabaseHandlerTests(unittest.TestCase):
    def setUp(self):
        # test_dbh sets up a db on localhost
        self.test_dbh = DatabaseHandler()
        self.setup_test_dbh_initial_contents()

    def setup_test_dbh_initial_contents(self):
        self.setup_map_points_collection()
        self.setup_english_tweets_collection()

    def setup_map_points_collection(self):
        # clear previous test records  [ CAUTION CHECK THAT DB OS NOT PRODUCTION DB ]
        self.test_dbh.db.map_points.remove()
        # write initial test_records
        for num in range(1, 4):  # loops for 3 months
            for num_of_rec in (1, num + 1):  # mth1 gets 1 rec, mth2 gets 2, mth3 gets 3 etc
                record = {'date': int('20160' + str(num_of_rec) + '01'), 'lat': '000000', 'long': '000000',
                          'text': "test_text" + str(num_of_rec)}
                self.test_dbh.db.map_points.insert(record)

    def setup_english_tweets_collection(self):
        # clear previous test records  [ CAUTION CHECK THAT DB OS NOT PRODUCTION DB ]
        self.test_dbh.db.english_tweets.remove()
        # write initial test_records
        for num in range(1, 4):  # loops for 3 months
            for num_of_rec in (1, num + 1):  # mth1 gets 1 rec, mth2 gets 2, mth3 gets 3 etc
                record = {'created': '20160' + str(num_of_rec) + '01', 'user_language': 'en',
                          'address': 'test address', 'latitude': '000000', 'longitude': '000000'}
                self.test_dbh.db.english_tweets.insert(record)

    def setup_non_english_tweets_collection(self):
        # clear previous test records   [ CAUTION CHECK THAT DB OS NOT PRODUCTION DB ]
        self.test_dbh.db.non_english_tweets.remove()
        # write initial test_records
        for num in range(1, 4):  # loops for 3 months
            for num_of_rec in (1, num + 1):
                record = {'created': '20160' + str(num_of_rec) + '01', 'user_language': 'pt',
                          'address': 'test address', 'latitude': '000000', 'longitude': '000000'}
                self.test_dbh.db.non_english_tweets.insert(record)

    def test_write_english_tweet_to_database_writes_record_to_test_db_english_tweet_table(self):
        # Arrange
        initial_collection_count = 6
        record = {'created': int('20160102'), 'user_language': 'en',
                  'address': 'test address', 'latitude': '000000', 'longitude': '000000'}

        # Execute
        self.test_dbh.write_english_tweet_to_database(record)

        # Check
        self.assertEqual(initial_collection_count + 1, self.test_dbh.db.english_tweets.find().count())
        self.assertEqual(record, self.test_dbh.db.english_tweets.find_one({'created': 20160102}))

    def test_write_map_point_writes_record_to_test_db(self):
        initial_collection_count = 6
        record = {'date': '20160102', 'lat': '000000', 'long': '000000',
                          'text': "test_text entered by write_method"}

        # Execute
        self.test_dbh.write_map_point_to_database(record)

        # Check
        self.assertEqual(initial_collection_count + 1, self.test_dbh.db.map_points.find().count())
        self.assertEqual(record, self.test_dbh.db.map_points.find_one({'date': '20160102'}))

    def test_get_map_points_for_five_dates_returns_points_for_five_days(self):
        # Arrange
        expected_count = 3

        # Execute
        points = self.test_dbh.get_map_points_for_five_dates('20160105', '20160101')

        # Check
        self.assertEqual(expected_count, points.count())

    def test_get_map_point_data_returns_points_within_defined_area(self):
        # Arrange
        record = {'date': int('20160601'), 'lat': 53.3478, 'long': 6.2597,
                  'text': "test_text"}
        self.test_dbh.db.map_points.insert(record)
        expected_count = 1

        # Execute
        records = self.test_dbh.get_map_point_data('54','52', '7', '5', '20160605', '20160531')

        # Check
        self.assertEqual(expected_count, records.count())

    def test_get_uncategorised_tweet_from_english_collection_returns_uncategorised_tweet(self):
        # Arrange
        record = {'created': '20160101', 'user_language': 'en', 'address': 'test address',
                  'latitude': '000000', 'longitude': '000000', 'sentiment': 'unknown'}
        self.test_dbh.db.english_tweets.insert(record)

        # Execute
        record = self.test_dbh.get_uncategorised_tweet_from_english_collection()

        # Check
        self.assertEqual('unknown', record['sentiment'])

    def test_update_document_sentiment_in_english_collection_updates_document(self):
        # Arrange
        record = {'created': '20160101', 'user_language': 'en', 'address': 'test address',
                  'latitude': '000000', 'longitude': '000000', 'sentiment': 'unknown'}
        record_id = self.test_dbh.db.english_tweets.insert(record) # returns id of newly created record
        expected_modified_count = 1

        modified_count = self.test_dbh.update_document_sentiment_in_english_collection(record_id, 'hasFlu',
                                                                                       'sample_text')
        # retrieve record to check sentiment
        record = self.test_dbh.db.english_tweets.find_one({"_id": ObjectId(record_id)})

        # Check
        self.assertEqual(expected_modified_count, modified_count)
        self.assertEqual('hasFlu', record['sentiment'])

    def test_get_tweets_with_sentiment_returns_appropriate_records(self):
        # Arrange
        record = {'created': '20160101', 'user_language': 'en', 'address': 'test address',
                  'latitude': '000000', 'longitude': '000000', 'sentiment': 'unknown'}
        self.test_dbh.db.english_tweets.insert(record)

        # Execute
        records = self.test_dbh.get_tweets_with_sentiment('unknown')

        # Check
        self.assertEqual(1, records.count())

    def test_get_total_count_returns_total_count(self):
        # Arrange
        expected_total_count = 6

        # Check
        self.assertEqual(expected_total_count, self.test_dbh.get_total_count())

    def test_get_today_count_return_todays_count(self):
        # Arrange
        expected_today_count = 3

        # Check
        self.assertEqual(expected_today_count, self.test_dbh.get_today_count('20160101'))

    def test_get_yearly_count_returns_year_count(self):
        # Arrange
        expected_yearly_count = 6

        # Check
        self.assertEqual(expected_yearly_count, self.test_dbh.get_yearly_count('2016'))

    def test_get_month_count_returns_month_count(self):
        # Arrange
        expected_count = 3

        # Check
        self.assertEqual(expected_count, self.test_dbh.get_month_count('201601'))

    def test_get_count_for_time_period_returns_correct_count_for_time_period(self):
        self.assertEqual(3, self.test_dbh.get_count_for_time_period('20160101', '20160101'))

    def test_get_instance_count_for_each_week_of_this_year_returns_dict_containing_correct_counts(self):
        # Arrange
        mock_get_date_ranges= {'week0': {'start_date': '20151228', 'end_date': '20160103'}}
        expected_results = collections.OrderedDict([('0', 3)])

        # Check
        self.assertEqual(expected_results,
                         self.test_dbh.get_instance_count_for_each_week_of_this_year(mock_get_date_ranges))
class DatabaseHandlerTests(unittest.TestCase):
    def setUp(self):
        # test_dbh sets up a db on localhost
        self.test_dbh = DatabaseHandler()
        self.setup_test_dbh_initial_contents()

    def setup_test_dbh_initial_contents(self):
        self.setup_map_points_collection()
        self.setup_english_tweets_collection()

    def setup_map_points_collection(self):
        # clear previous test records  [ CAUTION CHECK THAT DB OS NOT PRODUCTION DB ]
        self.test_dbh.db.map_points.remove()
        # write initial test_records
        for num in range(1, 4):  # loops for 3 months
            for num_of_rec in (
                    1,
                    num + 1):  # mth1 gets 1 rec, mth2 gets 2, mth3 gets 3 etc
                record = {
                    'date': int('20160' + str(num_of_rec) + '01'),
                    'lat': '000000',
                    'long': '000000',
                    'text': "test_text" + str(num_of_rec)
                }
                self.test_dbh.db.map_points.insert(record)

    def setup_english_tweets_collection(self):
        # clear previous test records  [ CAUTION CHECK THAT DB OS NOT PRODUCTION DB ]
        self.test_dbh.db.english_tweets.remove()
        # write initial test_records
        for num in range(1, 4):  # loops for 3 months
            for num_of_rec in (
                    1,
                    num + 1):  # mth1 gets 1 rec, mth2 gets 2, mth3 gets 3 etc
                record = {
                    'created': '20160' + str(num_of_rec) + '01',
                    'user_language': 'en',
                    'address': 'test address',
                    'latitude': '000000',
                    'longitude': '000000'
                }
                self.test_dbh.db.english_tweets.insert(record)

    def setup_non_english_tweets_collection(self):
        # clear previous test records   [ CAUTION CHECK THAT DB OS NOT PRODUCTION DB ]
        self.test_dbh.db.non_english_tweets.remove()
        # write initial test_records
        for num in range(1, 4):  # loops for 3 months
            for num_of_rec in (1, num + 1):
                record = {
                    'created': '20160' + str(num_of_rec) + '01',
                    'user_language': 'pt',
                    'address': 'test address',
                    'latitude': '000000',
                    'longitude': '000000'
                }
                self.test_dbh.db.non_english_tweets.insert(record)

    def test_write_english_tweet_to_database_writes_record_to_test_db_english_tweet_table(
            self):
        # Arrange
        initial_collection_count = 6
        record = {
            'created': int('20160102'),
            'user_language': 'en',
            'address': 'test address',
            'latitude': '000000',
            'longitude': '000000'
        }

        # Execute
        self.test_dbh.write_english_tweet_to_database(record)

        # Check
        self.assertEqual(initial_collection_count + 1,
                         self.test_dbh.db.english_tweets.find().count())
        self.assertEqual(
            record,
            self.test_dbh.db.english_tweets.find_one({'created': 20160102}))

    def test_write_map_point_writes_record_to_test_db(self):
        initial_collection_count = 6
        record = {
            'date': '20160102',
            'lat': '000000',
            'long': '000000',
            'text': "test_text entered by write_method"
        }

        # Execute
        self.test_dbh.write_map_point_to_database(record)

        # Check
        self.assertEqual(initial_collection_count + 1,
                         self.test_dbh.db.map_points.find().count())
        self.assertEqual(
            record, self.test_dbh.db.map_points.find_one({'date': '20160102'}))

    def test_get_map_points_for_five_dates_returns_points_for_five_days(self):
        # Arrange
        expected_count = 3

        # Execute
        points = self.test_dbh.get_map_points_for_five_dates(
            '20160105', '20160101')

        # Check
        self.assertEqual(expected_count, points.count())

    def test_get_map_point_data_returns_points_within_defined_area(self):
        # Arrange
        record = {
            'date': int('20160601'),
            'lat': 53.3478,
            'long': 6.2597,
            'text': "test_text"
        }
        self.test_dbh.db.map_points.insert(record)
        expected_count = 1

        # Execute
        records = self.test_dbh.get_map_point_data('54', '52', '7', '5',
                                                   '20160605', '20160531')

        # Check
        self.assertEqual(expected_count, records.count())

    def test_get_uncategorised_tweet_from_english_collection_returns_uncategorised_tweet(
            self):
        # Arrange
        record = {
            'created': '20160101',
            'user_language': 'en',
            'address': 'test address',
            'latitude': '000000',
            'longitude': '000000',
            'sentiment': 'unknown'
        }
        self.test_dbh.db.english_tweets.insert(record)

        # Execute
        record = self.test_dbh.get_uncategorised_tweet_from_english_collection(
        )

        # Check
        self.assertEqual('unknown', record['sentiment'])

    def test_update_document_sentiment_in_english_collection_updates_document(
            self):
        # Arrange
        record = {
            'created': '20160101',
            'user_language': 'en',
            'address': 'test address',
            'latitude': '000000',
            'longitude': '000000',
            'sentiment': 'unknown'
        }
        record_id = self.test_dbh.db.english_tweets.insert(
            record)  # returns id of newly created record
        expected_modified_count = 1

        modified_count = self.test_dbh.update_document_sentiment_in_english_collection(
            record_id, 'hasFlu', 'sample_text')
        # retrieve record to check sentiment
        record = self.test_dbh.db.english_tweets.find_one(
            {"_id": ObjectId(record_id)})

        # Check
        self.assertEqual(expected_modified_count, modified_count)
        self.assertEqual('hasFlu', record['sentiment'])

    def test_get_tweets_with_sentiment_returns_appropriate_records(self):
        # Arrange
        record = {
            'created': '20160101',
            'user_language': 'en',
            'address': 'test address',
            'latitude': '000000',
            'longitude': '000000',
            'sentiment': 'unknown'
        }
        self.test_dbh.db.english_tweets.insert(record)

        # Execute
        records = self.test_dbh.get_tweets_with_sentiment('unknown')

        # Check
        self.assertEqual(1, records.count())

    def test_get_total_count_returns_total_count(self):
        # Arrange
        expected_total_count = 6

        # Check
        self.assertEqual(expected_total_count, self.test_dbh.get_total_count())

    def test_get_today_count_return_todays_count(self):
        # Arrange
        expected_today_count = 3

        # Check
        self.assertEqual(expected_today_count,
                         self.test_dbh.get_today_count('20160101'))

    def test_get_yearly_count_returns_year_count(self):
        # Arrange
        expected_yearly_count = 6

        # Check
        self.assertEqual(expected_yearly_count,
                         self.test_dbh.get_yearly_count('2016'))

    def test_get_month_count_returns_month_count(self):
        # Arrange
        expected_count = 3

        # Check
        self.assertEqual(expected_count,
                         self.test_dbh.get_month_count('201601'))

    def test_get_count_for_time_period_returns_correct_count_for_time_period(
            self):
        self.assertEqual(
            3, self.test_dbh.get_count_for_time_period('20160101', '20160101'))

    def test_get_instance_count_for_each_week_of_this_year_returns_dict_containing_correct_counts(
            self):
        # Arrange
        mock_get_date_ranges = {
            'week0': {
                'start_date': '20151228',
                'end_date': '20160103'
            }
        }
        expected_results = collections.OrderedDict([('0', 3)])

        # Check
        self.assertEqual(
            expected_results,
            self.test_dbh.get_instance_count_for_each_week_of_this_year(
                mock_get_date_ranges))
示例#8
0
 def __init__(self):
     self.validator = ValidatorClass(pathToPickleFiles)
     self.geo_finder = GeolocationFinder()
     self.database_handler = DatabaseHandler(dbURL, dbPort,dbUser, dbPasswd)
示例#9
0
class DataCollector(StreamListener):
    def __init__(self):
        self.validator = ValidatorClass(pathToPickleFiles)
        self.geo_finder = GeolocationFinder()
        self.database_handler = DatabaseHandler(dbURL, dbPort,dbUser, dbPasswd)

    def on_data(self, raw_data):
        """
        When Listener detects a tweet with the keywords this method is called to handle the tweet.
        Sequence:
        - Load the json data
        - Validate tweet
        - Store if valid
        :param raw_data:
        :return: nothing
        """
        try:
            # Load the raw data
            json_data = json.loads(raw_data)

            # Get some required details from json data
            user_id, text, language, location, timestamp = self.get_data_from_json_data(json_data)

            # Check if text in tweet is valid before processing
            if text != 'invalid' and self.validator.validate_text_from_tweet(text):
                record = {'created': timestamp, 'user_language': language}

                # Check if tweet contains a valid location
                if self.validator.validate_location(location) and location != 'None':
                    # get location details of user
                    address, latitude, longitude = self.geo_finder.get_location(location)

                    # If location has not returned None for lat and long, construct and record the map point in database
                    if (latitude is not None) and (longitude is not None) \
                            and (latitude != 'None') and (longitude != 'None'):
                        self.add_to_record(address, latitude, longitude, record)
                        self.record_map_point(latitude, longitude, timestamp, text)
                # Check if language is english
                if self.language_is_english(language):
                    self.database_handler.write_english_tweet_to_database(record)
        except TypeError:
            logger.logging.exception('Error during on_data method')
        except ValueError:
            logger.logging.exception('Error during on_data method')

    def language_is_english(self, language):
        """
        Checks is language provided is english
        :param language:
        :return boolean value True/False
        """
        return (language == 'en') or (language == 'en-gb')

    def add_to_record(self, address, latitude, longitude, record):
        """
        Add location values to record which is a dictionary
        :param address: string value for address
        :param latitude: float value for latitude
        :param longitude: float value for longitude
        :param record: dictionary
        :return: nothing
        """
        record['address'] = address
        record['latitude'] = latitude
        record['longitude'] = longitude

    def record_map_point(self, latitude, longitude, timestamp, text):
        """
        creats a record(dictionary) for map point and calls the database handler to store it
        :param latitude: float value for latitude
        :param longitude: float value for longitude
        :param timestamp: string value for timestamp
        :param text: string value for text
        :return: nothing
        """
        map_point_record = {'date': int(timestamp), 'lat': latitude, 'long': longitude, 'text': text}
        self.database_handler.write_map_point_to_database(map_point_record)

    def get_data_from_json_data(self, json_data):
        """
        extracts appropriate data from json data, if KeyError occurs sets attribute to unknown or none
        :param json_data:
        :return: user_id(string), text(string), user_language(string), location(string), timestamp(string)
        """
        try:
            user_id = json_data['user']['id_str']

        except KeyError:
            logger.logging.exception('KeyError while accessing user ID')
            user_id = 'unknown'
        try:
            user_language = json_data['user']['lang']
        except KeyError:
            logger.logging.exception('KeyError while accessing user language')
            user_language = 'unknown'
        try:
            location = json_data['user']['location']
        except KeyError:
            logger.logging.exception('KeyError while accessing user location')
            location = None
        try:
            text = json_data['text'].lower()
        except KeyError:
            # if keyError is raised set the text to a banned word so it will not be accepted
            text = 'invalid text'
            logger.logging.exception('KeyError while accessing tweet text')
        # Get time tweet picked up
        timestamp = self.get_timestamp()

        return user_id, text, user_language, location, timestamp

    def get_timestamp(self):
        """
        creates a timestamp in string format
        :return: timestamp(string)
        """
        now = datetime.datetime.now()
        day = str(now.day)
        month = str(now.month)
        year = str(now.year)

        if len(day) == 1:
            day = '0' + day
        if len(month) == 1:
            month = '0' + month
        timestamp = year + month + day
        return timestamp

    def on_error(self, status_code):
        logging.error('Twitter Stream returned status code:' + str(status_code))
示例#10
0
#   Author: David Dunne,    Student Number: C00173649,      Created Nov 2015

from flask import Flask, render_template, request, jsonify, make_response
from utilities.database_handler import DatabaseHandler
from utilities import email_sender
from datetime import date, datetime, timedelta


app = Flask(__name__)

database_handler = DatabaseHandler('ds061335.mongolab.com', 61335, 'flutrackapp', 'flutrackapp')


@app.route('/', methods=['GET'])
def default_page():
    """
    Renders web interface for flu-TrakR
    :return: rendered html page
    """
    return render_template('home.html')


@app.route('/categorise', methods=['GET'])
def categorise():
    """
    Used to render form for labelling sentiment of tweets for purposes of developing a training set
    :return: rendered html page containing web form
    """
    return render_template('dataCategorisor.html')

        pickle.dump(object, file)
        file.close()
    except IOError:
        print("Error, failed to write to file: " + path + filename)


def write_to_file(filename, res):
    try:
        f = open(filename, 'w')
        for l in res:
            f.write(l['text'] + "\n")
        f.close()
    except IOError:
        print("cannot open file: " + filename)
print("Updating test data ... ")
dbh = DatabaseHandler()
res = dbh.get_tweets_with_sentiment('has flu')
print("Positive file updated")
write_to_file('positive.txt', res)

res = dbh.get_tweets_with_sentiment('no flu')
write_to_file('negative.txt', res)
print("Negative file updated")



# J is adjective, r is adverb, and v is verb
allowed_word_types = ['J']


documents = []