class CooccurrenceGraphDAO(GenericDAO, metaclass=Singleton): def __init__(self): super(CooccurrenceGraphDAO, self).__init__(Mongo().get().db.cooccurrence_graphs) self.logger = Logger(self.__class__.__name__) def store(self, graphs, start_date, end_date): """ Store main graph and all topic graphs into collection. """ documents = [{ 'topic_id': key, 'graph': graph, 'start_date': start_date, 'end_date': end_date } for key, graph in graphs.items()] self.collection.insert_many(documents) def get_all_sorted_topics(self): graphs = self.get_all({}, {'topic_id': 1}) topic_ids = set() for graph in graphs: topic_ids.add(graph['topic_id']) topics_list = sorted(list(topic_ids)) return [str(topic) for topic in topics_list] def create_indexes(self): self.logger.info( 'Creating topic_id index for collection cooccurrence_graphs.') Mongo().get().db.cooccurrence_graphs.create_index([ ('topic_id', pymongo.DESCENDING) ])
def __init__(self): self.logger = Logger(self.__class__.__name__) self.updating_followers = set() self.candidates = [] # Load candidates from db and create objects to access their elements self.candidates = CandidateDAO().all() ConcurrencyUtils().create_lock('candidate_for_update')
class CooccurrenceGraphDAO(GenericDAO, metaclass=Singleton): def __init__(self): super(CooccurrenceGraphDAO, self).__init__(Mongo().get().db.cooccurrence_graphs) self.logger = Logger(self.__class__.__name__) def create_indexes(self): self.logger.info('Creating topic_id index for collection cooccurrence_graphs.') Mongo().get().db.cooccurrence_graphs.create_index([('topic_id', pymongo.DESCENDING)])
class RawFollowerDAO(GenericDAO, metaclass=Singleton): def __init__(self): super(RawFollowerDAO, self).__init__(Mongo().get().db.raw_followers) self.logger = Logger(self.__class__.__name__) def create_indexes(self): self.logger.info( 'Creating has_tweets index for collection raw_followers.') Mongo().get().db.raw_followers.create_index([('has_tweets', pymongo.DESCENDING)])
def set_up_context(db_name, authorization, environment): # Configure logger Logger.set_up(environment) Logger(__name__).info(f'Starting application in environment {environment}') # Configure database app.config['MONGO_DBNAME'] = db_name app.config[ 'MONGO_URI'] = f'mongodb://{authorization}localhost:27017/{db_name}' Mongo().db.init_app(app) with app.app_context(): create_indexes()
def __init__(self): self.logger = Logger(self.__class__.__name__) self.in_use = set() self.credentials = [] # Load credentials file and create objects to access their elements try: with open(CredentialService.CREDENTIALS_PATH, 'r') as file: loaded = json.load(file) for value in loaded: self.credentials.append(Credential(**value)) except IOError: self.logger.error('Credentials file do not found')
def _run(self, executable, args_list, multiple=False): Logger( self.__class__.__name__).info('Starting asynchronous thread pool.') max_workers = ConfigurationManager().get_int('max_pool_workers') with ThreadPoolExecutor(max_workers=max_workers) as executor: # TODO: This could be avoided but all single-param calls should send a list of one element if multiple: futures = self.__create_futures_multiple_args( executor, executable, args_list) else: futures = self.__create_futures(executor, executable, args_list) results = [future.result() for future in as_completed(futures)] Logger(self.__class__.__name__).info( 'Finished executing tasks in asynchronous thread pool.') return results
class ContextInitializer: LOGGER = Logger('ContextInitializer') @classmethod def initialize_context(cls): """ Create instances of all environment services in a Spring-like fashion.""" cls.LOGGER.info('Instantiating context services and components.') ConfigurationManager() ConcurrencyUtils() Scheduler() CandidateDAO() RawFollowerDAO() CandidatesFollowersDAO() CredentialService() CandidateService() FollowerUpdateService() TweetUpdateService() FollowersQueueService()
def __init__(self): super(CandidatesFollowersDAO, self).__init__(Mongo().get().db.candidates_followers) self.logger = Logger(self.__class__.__name__)
def __init__(self): super(TopicUsageDAO, self).__init__(Mongo().get().db.topic_usage) self.logger = Logger(self.__class__.__name__)
def get_logger(cls): return Logger('UserTopicService')
def __init__(self): super(HashtagsTopicsDAO, self).__init__(Mongo().get().db.hashtags_topics) self.logger = Logger(self.__class__.__name__)
def __init__(self): super(UsersFriendsDAO, self).__init__(Mongo().get().db.users_friends) self.logger = Logger(self.__class__.__name__)
def __init__(self): super(HashtagEntropyDAO, self).__init__(Mongo().get().db.hashtag_entropy) self.logger = Logger(self.__class__.__name__)
class CandidateService(metaclass=Singleton): def __init__(self): self.logger = Logger(self.__class__.__name__) self.updating_followers = set() self.candidates = [] # Load candidates from db and create objects to access their elements self.candidates = CandidateDAO().all() ConcurrencyUtils().create_lock('candidate_for_update') def get_all(self): """ Returns all candidates currently in the list. """ return self.candidates def get_for_follower_updating(self): """ Polls a candidate for updating its follower list. """ # Lock to avoid concurrency issues when retrieving candidates across threads ConcurrencyUtils().acquire_lock('candidate_for_update') for candidate in self.candidates: # We will only return a candidate if it was not updated today and is not being currently updated if candidate not in self.updating_followers and not DateUtils.is_today( candidate.last_updated_followers): self.logger.info( f'Returning candidate {candidate.screen_name} for follower retrieval.' ) self.updating_followers.add(candidate) # Unlock ConcurrencyUtils().release_lock('candidate_for_update') return candidate # Unlock ConcurrencyUtils().release_lock('candidate_for_update') raise FollowerUpdatingNotNecessaryError() def finish_follower_updating(self, candidate): """ Unlock user for follower updating and update last updating time. """ if candidate not in self.updating_followers: raise CandidateCurrentlyAvailableForUpdateError( candidate.screen_name) # Update last updated followers date self.logger.info( f'Removing candidate {candidate.screen_name} from currently updating set.' ) candidate.last_updated_followers = datetime.now() CandidateDAO().overwrite(candidate) # Remove from set to not be polled again self.updating_followers.remove(candidate) def add_candidate(self, screen_name, nickname=None): """ Add a candidate with given screen name and nickname to the database and to the json file. """ try: CandidateDAO().find(screen_name) except NonExistentCandidateError: self.logger.info(f'Adding candidate {screen_name} to database.') candidate = Candidate(**{ 'screen_name': screen_name, 'nickname': nickname }) # Store in database CandidateDAO().save(candidate) # Update json resource CandidateDAO().update_json_resource(candidate) # Update current structure self.candidates.append(candidate) return raise CandidateAlreadyExistsError(screen_name)
def get_logger(cls): return Logger('FollowerSupportService')
def get_logger(cls): return Logger('HashtagCooccurrenceService')
def __init__(self): super(HashtagUsageDAO, self).__init__(Mongo().get().db.hashtag_usage) self.logger = Logger(self.__class__.__name__)
def __init__(self): super(RawFollowerDAO, self).__init__(Mongo().get().db.raw_followers) self.logger = Logger(self.__class__.__name__)
def __init__(self): super(CommunityStrengthDAO, self).__init__(Mongo().get().db.community_strength) self.logger = Logger(self.__class__.__name__)
class CandidateDAO(GenericDAO, metaclass=Singleton): FILE_PATH = f"{abspath(join(dirname(__file__), '../../'))}/resources/candidates.json" def __init__(self): super(CandidateDAO, self).__init__(Mongo().get().db.candidates) self.logger = Logger(self.__class__.__name__) def find(self, screen_name): """ Get user with given screen name. """ as_dict = self.get_first({'_id': screen_name}) if as_dict is None: raise NonExistentCandidateError(screen_name) # Transform from DB format to DTO format as_dict['screen_name'] = as_dict['_id'] return Candidate(**as_dict) def overwrite(self, candidate): """ Update candidate's fields (except for screen name). """ self.update_first( {'_id': candidate.screen_name}, { 'nickname': candidate.nickname, 'last_updated_followers': candidate.last_updated_followers }) def save(self, candidate): """ Store candidate. """ # Transform from DTO format to DB format to_insert = { '_id': candidate.screen_name, 'nickname': candidate.nickname, 'last_updated_followers': candidate.last_updated_followers } return self.insert(to_insert) def all(self): """ Get all currently stored candidates. """ candidates = [] as_dict_list = self.get_all() for as_dict in as_dict_list: # Transform from DB format to DTO format as_dict['screen_name'] = as_dict['_id'] candidates.append(Candidate(**as_dict)) return candidates def create_indexes(self): # There are no indexes to create for this collection pass def create_base_entries(self): # Check if collection is empty if self.get_all().count() > 0: return # Load candidates self.logger.info('Loading candidates from file into database.') with open(CandidateDAO.FILE_PATH, 'r') as file: candidates = json.load(file) # Store entries for candidate in candidates: # Transform for database format to_insert = { '_id': candidate['screen_name'], 'nickname': candidate['nickname'] } self.insert(to_insert) def update_json_resource(self, candidate): """ Add candidate to json file. """ self.logger.info( f'Storing candidate {candidate.screen_name} into file.') with open(CandidateDAO.FILE_PATH, 'r') as file: candidates = json.load(file) # Append new candidate candidates.append({ 'screen_name': candidate.screen_name, 'nickname': candidate.nickname }) # Write to file with open(CandidateDAO.FILE_PATH, 'w') as file: json.dump(candidates, file) def get_required_candidates(self): """ Retrieve dictionary like: {candidate: index}. """ candidates = self.get_all({'index': {'$exists': True}}) candidate_index = {} candidate_group = {} for candidate in candidates: candidate_index[candidate['_id']] = candidate['index'] candidate_group[candidate['index']] = candidate['group'] return candidate_index, candidate_group
def __init__(self): super(RawTweetDAO, self).__init__(Mongo().get().db.raw_tweets) # self.__dict__.update(**kwargs) self.logger = Logger(self.__class__.__name__)
def __init__(self): super(SimilarityDAO, self).__init__(Mongo().get().db.users_similarities) self.logger = Logger(self.__class__.__name__)
def get_logger(cls): return Logger('TweetUpdateService')
class CredentialService(metaclass=Singleton): CREDENTIALS_PATH = f"{abspath(join(dirname(__file__), '../../..'))}/twitter_credentials.json" def __init__(self): self.logger = Logger(self.__class__.__name__) self.in_use = set() self.credentials = [] # Load credentials file and create objects to access their elements try: with open(CredentialService.CREDENTIALS_PATH, 'r') as file: loaded = json.load(file) for value in loaded: self.credentials.append(Credential(**value)) except IOError: self.logger.error('Credentials file do not found') def get_all_credentials_for_service(self, service_id): """ Return all credentials for a given service. """ self.logger.info( f'Returning all credentials for service {service_id}.') # Check if some credential has already been assigned for credential in self.credentials: if f"{credential.id}-{service_id}" in self.in_use: raise CredentialsAlreadyInUseError(service_id) self.logger.info('Checked credentials') # Store in the in use set. We iterate twice because the number of credentials is small and it is easier than # doing rollbacks with the already stored credentials if we need to raise an exception for credential in self.credentials: self.in_use.add(f"{credential.id}-{service_id}") return self.credentials def get_credential_for_service(self, service_id): """ Get credential if current service is not using all of the available credentials. """ for credential in self.credentials: if f"{credential.id}-{service_id}" not in self.in_use: self.logger.info( f'Returning credential {credential.id} for service {service_id}.' ) self.in_use.add(f"{credential.id}-{service_id}") return credential raise NoAvailableCredentialsError(service_id) def get_credential_with_id_for_service(self, credential_id, service_id): """ Get credential if current service is not using all of the available credentials. """ for credential in self.credentials: if credential_id == credential.id and f"{credential.id}-{service_id}" not in self.in_use: self.logger.info( f'Returning credential {credential.id} for service {service_id}.' ) self.in_use.add(f"{credential.id}-{service_id}") return credential raise NoAvailableCredentialsError(service_id) def unlock_credential(self, credential_id, service_id): """ Unlock credential for a given service. """ key = f'{credential_id}-{service_id}' if key not in self.in_use: raise CredentialCurrentlyAvailableError(key) self.logger.info( f'Unlocking credential {credential_id} for service {service_id}.') self.in_use.remove(key)
def __init__(self): super(ShowableGraphDAO, self).__init__(Mongo().get().db.showable_graphs) self.logger = Logger(self.__class__.__name__)
def get_logger(cls): return Logger(cls.__name__)
def __init__(self): super(CooccurrenceGraphDAO, self).__init__(Mongo().get().db.cooccurrence_graphs) self.logger = Logger(self.__class__.__name__)
def get_logger(cls): return Logger('CSVUtils')
def __init__(self): super(PartyRelationshipsDAO, self).__init__(Mongo().get().db.party_relationships) self.logger = Logger(self.__class__.__name__)