def setUp(self): self.test_geolocation_finder = GeolocationFinder() self.setup_mocks()
def __init__(self): self.validator = ValidatorClass(pathToPickleFiles) self.geo_finder = GeolocationFinder() self.database_handler = DatabaseHandler(dbURL, dbPort, dbUser, dbPasswd)
class GeolocationFinderTests(unittest.TestCase): def setUp(self): self.test_geolocation_finder = GeolocationFinder() self.setup_mocks() def setup_mocks(self): self.attrs = {'address': 'test_address', 'latitude': '0000', 'longitude': '0000'} self.test_geolocation = Mock(**self.attrs) self.test_geolocation_finder.geolocation = Mock(**self.attrs) def test_GetLocation_ReturnsNoneNoneNone_IfLocationIsNone(self): # Checks self.assertEqual((None, None, None), self.test_geolocation_finder.get_location(None)) def test_get_location_sets_geolocation_from_cache_When_geolocation_exists_in_cache(self): # Arrange self.test_geolocation_finder.location_cache['Dublin'] = self.test_geolocation # Execute with patch.object(GeolocationFinder, 'set_geolocation_from_cache') as mock_method: self.test_geolocation_finder.get_location('Dublin') # Checks mock_method.assert_called_once_with('Dublin') def test_get_location_sets_geolocation_from_geolocator_when_not_present_in_cache(self): # Execute with patch.object(GeolocationFinder, 'set_geolocation_from_geolocator', return_value=self.test_geolocation) as mock_method: self.test_geolocation_finder.get_location('Dublin') # Checks mock_method.assert_called_once_with('Dublin') def test_get_location_sets_geolocation_from_geolocator_adds_geolocation_to_cache(self): # Arrange self.test_geolocation_finder.location_cache = {} # Execute with patch.object(Nominatim, 'geocode', return_value=self.test_geolocation) as mock_method: self.test_geolocation_finder.get_location('Dublin') # Checks self.assertTrue('Dublin' in self.test_geolocation_finder.location_cache) def test_set_geolocation_from_cache_sets_the_geolocation(self): # Arrange self.test_geolocation_finder.location_cache['Dublin'] = self.test_geolocation # Execute self.test_geolocation_finder.set_geolocation_from_cache('Dublin') # Checks self.assertEqual(self.test_geolocation_finder.geolocation, self.test_geolocation) def test_set_geolocation_from_geolocator_sets_geolocation(self): # Execute with patch.object(Nominatim, 'geocode', return_value=self.test_geolocation): self.test_geolocation_finder.set_geolocation_from_geolocator('Dublin') # Checks self.assertEqual(self.test_geolocation_finder.geolocation, self.test_geolocation) def test_set_geolocation_from_geolocator_sets_geolocation_to_None_when_GeocoderTimedOut_exception_raised(self): # Execute with patch.object(Nominatim, 'geocode', side_effect=GeocoderTimedOut): self.test_geolocation_finder.set_geolocation_from_geolocator('Dublin') # Checks self.assertEqual(self.test_geolocation_finder.geolocation, None) def test_get_addr_lat_long_raises_AttributeError_and_returns_None_None_None_when_no_geolocation_passed(self): # Arrange self.test_geolocation_finder.geolocation = None # Execute address, lat, long = self.test_geolocation_finder.get_addr_lat_long() # Checks self.assertEqual(address, None) self.assertEqual(lat, None) self.assertEqual(long, None) def test_get_addr_lat_long(self): # Execute address, lat, long = self.test_geolocation_finder.get_addr_lat_long() # Checks self.assertEqual(address, 'test_address') self.assertEqual(lat, '0000') self.assertEqual(long, '0000')
class DataCollector(StreamListener): def __init__(self): self.validator = ValidatorClass(pathToPickleFiles) self.geo_finder = GeolocationFinder() self.database_handler = DatabaseHandler(dbURL, dbPort, dbUser, dbPasswd) def on_data(self, raw_data): """ When Listener detects a tweet with the keywords this method is called to handle the tweet. Sequence: - Load the json data - Validate tweet - Store if valid :param raw_data: :return: nothing """ try: # Load the raw data json_data = json.loads(raw_data) # Get some required details from json data user_id, text, language, location, timestamp = self.get_data_from_json_data( json_data) # Check if text in tweet is valid before processing if text != 'invalid' and self.validator.validate_text_from_tweet( text): record = {'created': timestamp, 'user_language': language} # Check if tweet contains a valid location if self.validator.validate_location( location) and location != 'None': # get location details of user address, latitude, longitude = self.geo_finder.get_location( location) # If location has not returned None for lat and long, construct and record the map point in database if (latitude is not None) and (longitude is not None) \ and (latitude != 'None') and (longitude != 'None'): self.add_to_record(address, latitude, longitude, record) self.record_map_point(latitude, longitude, timestamp, text) # Check if language is english if self.language_is_english(language): self.database_handler.write_english_tweet_to_database( record) except TypeError: logger.logging.exception('Error during on_data method') except ValueError: logger.logging.exception('Error during on_data method') def language_is_english(self, language): """ Checks is language provided is english :param language: :return boolean value True/False """ return (language == 'en') or (language == 'en-gb') def add_to_record(self, address, latitude, longitude, record): """ Add location values to record which is a dictionary :param address: string value for address :param latitude: float value for latitude :param longitude: float value for longitude :param record: dictionary :return: nothing """ record['address'] = address record['latitude'] = latitude record['longitude'] = longitude def record_map_point(self, latitude, longitude, timestamp, text): """ creats a record(dictionary) for map point and calls the database handler to store it :param latitude: float value for latitude :param longitude: float value for longitude :param timestamp: string value for timestamp :param text: string value for text :return: nothing """ map_point_record = { 'date': int(timestamp), 'lat': latitude, 'long': longitude, 'text': text } self.database_handler.write_map_point_to_database(map_point_record) def get_data_from_json_data(self, json_data): """ extracts appropriate data from json data, if KeyError occurs sets attribute to unknown or none :param json_data: :return: user_id(string), text(string), user_language(string), location(string), timestamp(string) """ try: user_id = json_data['user']['id_str'] except KeyError: logger.logging.exception('KeyError while accessing user ID') user_id = 'unknown' try: user_language = json_data['user']['lang'] except KeyError: logger.logging.exception('KeyError while accessing user language') user_language = 'unknown' try: location = json_data['user']['location'] except KeyError: logger.logging.exception('KeyError while accessing user location') location = None try: text = json_data['text'].lower() except KeyError: # if keyError is raised set the text to a banned word so it will not be accepted text = 'invalid text' logger.logging.exception('KeyError while accessing tweet text') # Get time tweet picked up timestamp = self.get_timestamp() return user_id, text, user_language, location, timestamp def get_timestamp(self): """ creates a timestamp in string format :return: timestamp(string) """ now = datetime.datetime.now() day = str(now.day) month = str(now.month) year = str(now.year) if len(day) == 1: day = '0' + day if len(month) == 1: month = '0' + month timestamp = year + month + day return timestamp def on_error(self, status_code): logging.error('Twitter Stream returned status code:' + str(status_code))
def __init__(self): self.validator = ValidatorClass(pathToPickleFiles) self.geo_finder = GeolocationFinder() self.database_handler = DatabaseHandler(dbURL, dbPort,dbUser, dbPasswd)
class DataCollector(StreamListener): def __init__(self): self.validator = ValidatorClass(pathToPickleFiles) self.geo_finder = GeolocationFinder() self.database_handler = DatabaseHandler(dbURL, dbPort,dbUser, dbPasswd) def on_data(self, raw_data): """ When Listener detects a tweet with the keywords this method is called to handle the tweet. Sequence: - Load the json data - Validate tweet - Store if valid :param raw_data: :return: nothing """ try: # Load the raw data json_data = json.loads(raw_data) # Get some required details from json data user_id, text, language, location, timestamp = self.get_data_from_json_data(json_data) # Check if text in tweet is valid before processing if text != 'invalid' and self.validator.validate_text_from_tweet(text): record = {'created': timestamp, 'user_language': language} # Check if tweet contains a valid location if self.validator.validate_location(location) and location != 'None': # get location details of user address, latitude, longitude = self.geo_finder.get_location(location) # If location has not returned None for lat and long, construct and record the map point in database if (latitude is not None) and (longitude is not None) \ and (latitude != 'None') and (longitude != 'None'): self.add_to_record(address, latitude, longitude, record) self.record_map_point(latitude, longitude, timestamp, text) # Check if language is english if self.language_is_english(language): self.database_handler.write_english_tweet_to_database(record) except TypeError: logger.logging.exception('Error during on_data method') except ValueError: logger.logging.exception('Error during on_data method') def language_is_english(self, language): """ Checks is language provided is english :param language: :return boolean value True/False """ return (language == 'en') or (language == 'en-gb') def add_to_record(self, address, latitude, longitude, record): """ Add location values to record which is a dictionary :param address: string value for address :param latitude: float value for latitude :param longitude: float value for longitude :param record: dictionary :return: nothing """ record['address'] = address record['latitude'] = latitude record['longitude'] = longitude def record_map_point(self, latitude, longitude, timestamp, text): """ creats a record(dictionary) for map point and calls the database handler to store it :param latitude: float value for latitude :param longitude: float value for longitude :param timestamp: string value for timestamp :param text: string value for text :return: nothing """ map_point_record = {'date': int(timestamp), 'lat': latitude, 'long': longitude, 'text': text} self.database_handler.write_map_point_to_database(map_point_record) def get_data_from_json_data(self, json_data): """ extracts appropriate data from json data, if KeyError occurs sets attribute to unknown or none :param json_data: :return: user_id(string), text(string), user_language(string), location(string), timestamp(string) """ try: user_id = json_data['user']['id_str'] except KeyError: logger.logging.exception('KeyError while accessing user ID') user_id = 'unknown' try: user_language = json_data['user']['lang'] except KeyError: logger.logging.exception('KeyError while accessing user language') user_language = 'unknown' try: location = json_data['user']['location'] except KeyError: logger.logging.exception('KeyError while accessing user location') location = None try: text = json_data['text'].lower() except KeyError: # if keyError is raised set the text to a banned word so it will not be accepted text = 'invalid text' logger.logging.exception('KeyError while accessing tweet text') # Get time tweet picked up timestamp = self.get_timestamp() return user_id, text, user_language, location, timestamp def get_timestamp(self): """ creates a timestamp in string format :return: timestamp(string) """ now = datetime.datetime.now() day = str(now.day) month = str(now.month) year = str(now.year) if len(day) == 1: day = '0' + day if len(month) == 1: month = '0' + month timestamp = year + month + day return timestamp def on_error(self, status_code): logging.error('Twitter Stream returned status code:' + str(status_code))
class GeolocationFinderTests(unittest.TestCase): def setUp(self): self.test_geolocation_finder = GeolocationFinder() self.setup_mocks() def setup_mocks(self): self.attrs = { 'address': 'test_address', 'latitude': '0000', 'longitude': '0000' } self.test_geolocation = Mock(**self.attrs) self.test_geolocation_finder.geolocation = Mock(**self.attrs) def test_GetLocation_ReturnsNoneNoneNone_IfLocationIsNone(self): # Checks self.assertEqual((None, None, None), self.test_geolocation_finder.get_location(None)) def test_get_location_sets_geolocation_from_cache_When_geolocation_exists_in_cache( self): # Arrange self.test_geolocation_finder.location_cache[ 'Dublin'] = self.test_geolocation # Execute with patch.object(GeolocationFinder, 'set_geolocation_from_cache') as mock_method: self.test_geolocation_finder.get_location('Dublin') # Checks mock_method.assert_called_once_with('Dublin') def test_get_location_sets_geolocation_from_geolocator_when_not_present_in_cache( self): # Execute with patch.object(GeolocationFinder, 'set_geolocation_from_geolocator', return_value=self.test_geolocation) as mock_method: self.test_geolocation_finder.get_location('Dublin') # Checks mock_method.assert_called_once_with('Dublin') def test_get_location_sets_geolocation_from_geolocator_adds_geolocation_to_cache( self): # Arrange self.test_geolocation_finder.location_cache = {} # Execute with patch.object(Nominatim, 'geocode', return_value=self.test_geolocation) as mock_method: self.test_geolocation_finder.get_location('Dublin') # Checks self.assertTrue( 'Dublin' in self.test_geolocation_finder.location_cache) def test_set_geolocation_from_cache_sets_the_geolocation(self): # Arrange self.test_geolocation_finder.location_cache[ 'Dublin'] = self.test_geolocation # Execute self.test_geolocation_finder.set_geolocation_from_cache('Dublin') # Checks self.assertEqual(self.test_geolocation_finder.geolocation, self.test_geolocation) def test_set_geolocation_from_geolocator_sets_geolocation(self): # Execute with patch.object(Nominatim, 'geocode', return_value=self.test_geolocation): self.test_geolocation_finder.set_geolocation_from_geolocator( 'Dublin') # Checks self.assertEqual(self.test_geolocation_finder.geolocation, self.test_geolocation) def test_set_geolocation_from_geolocator_sets_geolocation_to_None_when_GeocoderTimedOut_exception_raised( self): # Execute with patch.object(Nominatim, 'geocode', side_effect=GeocoderTimedOut): self.test_geolocation_finder.set_geolocation_from_geolocator( 'Dublin') # Checks self.assertEqual(self.test_geolocation_finder.geolocation, None) def test_get_addr_lat_long_raises_AttributeError_and_returns_None_None_None_when_no_geolocation_passed( self): # Arrange self.test_geolocation_finder.geolocation = None # Execute address, lat, long = self.test_geolocation_finder.get_addr_lat_long() # Checks self.assertEqual(address, None) self.assertEqual(lat, None) self.assertEqual(long, None) def test_get_addr_lat_long(self): # Execute address, lat, long = self.test_geolocation_finder.get_addr_lat_long() # Checks self.assertEqual(address, 'test_address') self.assertEqual(lat, '0000') self.assertEqual(long, '0000')