Пример #1
0
class CooccurrenceGraphDAO(GenericDAO, metaclass=Singleton):
    def __init__(self):
        super(CooccurrenceGraphDAO,
              self).__init__(Mongo().get().db.cooccurrence_graphs)
        self.logger = Logger(self.__class__.__name__)

    def store(self, graphs, start_date, end_date):
        """ Store main graph and all topic graphs into collection. """
        documents = [{
            'topic_id': key,
            'graph': graph,
            'start_date': start_date,
            'end_date': end_date
        } for key, graph in graphs.items()]
        self.collection.insert_many(documents)

    def get_all_sorted_topics(self):
        graphs = self.get_all({}, {'topic_id': 1})
        topic_ids = set()
        for graph in graphs:
            topic_ids.add(graph['topic_id'])
        topics_list = sorted(list(topic_ids))
        return [str(topic) for topic in topics_list]

    def create_indexes(self):
        self.logger.info(
            'Creating topic_id index for collection cooccurrence_graphs.')
        Mongo().get().db.cooccurrence_graphs.create_index([
            ('topic_id', pymongo.DESCENDING)
        ])
Пример #2
0
 def __init__(self):
     self.logger = Logger(self.__class__.__name__)
     self.updating_followers = set()
     self.candidates = []
     # Load candidates from db and create objects to access their elements
     self.candidates = CandidateDAO().all()
     ConcurrencyUtils().create_lock('candidate_for_update')
Пример #3
0
class CooccurrenceGraphDAO(GenericDAO, metaclass=Singleton):

    def __init__(self):
        super(CooccurrenceGraphDAO, self).__init__(Mongo().get().db.cooccurrence_graphs)
        self.logger = Logger(self.__class__.__name__)

    def create_indexes(self):
        self.logger.info('Creating topic_id index for collection cooccurrence_graphs.')
        Mongo().get().db.cooccurrence_graphs.create_index([('topic_id', pymongo.DESCENDING)])
Пример #4
0
class RawFollowerDAO(GenericDAO, metaclass=Singleton):
    def __init__(self):
        super(RawFollowerDAO, self).__init__(Mongo().get().db.raw_followers)
        self.logger = Logger(self.__class__.__name__)

    def create_indexes(self):
        self.logger.info(
            'Creating has_tweets index for collection raw_followers.')
        Mongo().get().db.raw_followers.create_index([('has_tweets',
                                                      pymongo.DESCENDING)])
Пример #5
0
def set_up_context(db_name, authorization, environment):
    # Configure logger
    Logger.set_up(environment)
    Logger(__name__).info(f'Starting application in environment {environment}')
    # Configure database
    app.config['MONGO_DBNAME'] = db_name
    app.config[
        'MONGO_URI'] = f'mongodb://{authorization}localhost:27017/{db_name}'
    Mongo().db.init_app(app)
    with app.app_context():
        create_indexes()
Пример #6
0
 def __init__(self):
     self.logger = Logger(self.__class__.__name__)
     self.in_use = set()
     self.credentials = []
     # Load credentials file and create objects to access their elements
     try:
         with open(CredentialService.CREDENTIALS_PATH, 'r') as file:
             loaded = json.load(file)
             for value in loaded:
                 self.credentials.append(Credential(**value))
     except IOError:
         self.logger.error('Credentials file do not found')
Пример #7
0
 def _run(self, executable, args_list, multiple=False):
     Logger(
         self.__class__.__name__).info('Starting asynchronous thread pool.')
     max_workers = ConfigurationManager().get_int('max_pool_workers')
     with ThreadPoolExecutor(max_workers=max_workers) as executor:
         # TODO: This could be avoided but all single-param calls should send a list of one element
         if multiple:
             futures = self.__create_futures_multiple_args(
                 executor, executable, args_list)
         else:
             futures = self.__create_futures(executor, executable,
                                             args_list)
         results = [future.result() for future in as_completed(futures)]
     Logger(self.__class__.__name__).info(
         'Finished executing tasks in asynchronous thread pool.')
     return results
Пример #8
0
class ContextInitializer:

    LOGGER = Logger('ContextInitializer')

    @classmethod
    def initialize_context(cls):
        """ Create instances of all environment services in a Spring-like fashion."""
        cls.LOGGER.info('Instantiating context services and components.')
        ConfigurationManager()
        ConcurrencyUtils()
        Scheduler()
        CandidateDAO()
        RawFollowerDAO()
        CandidatesFollowersDAO()
        CredentialService()
        CandidateService()
        FollowerUpdateService()
        TweetUpdateService()
        FollowersQueueService()
Пример #9
0
 def __init__(self):
     super(CandidatesFollowersDAO,
           self).__init__(Mongo().get().db.candidates_followers)
     self.logger = Logger(self.__class__.__name__)
Пример #10
0
 def __init__(self):
     super(TopicUsageDAO, self).__init__(Mongo().get().db.topic_usage)
     self.logger = Logger(self.__class__.__name__)
Пример #11
0
 def get_logger(cls):
     return Logger('UserTopicService')
Пример #12
0
 def __init__(self):
     super(HashtagsTopicsDAO, self).__init__(Mongo().get().db.hashtags_topics)
     self.logger = Logger(self.__class__.__name__)
Пример #13
0
 def __init__(self):
     super(UsersFriendsDAO, self).__init__(Mongo().get().db.users_friends)
     self.logger = Logger(self.__class__.__name__)
Пример #14
0
 def __init__(self):
     super(HashtagEntropyDAO,
           self).__init__(Mongo().get().db.hashtag_entropy)
     self.logger = Logger(self.__class__.__name__)
Пример #15
0
class CandidateService(metaclass=Singleton):
    def __init__(self):
        self.logger = Logger(self.__class__.__name__)
        self.updating_followers = set()
        self.candidates = []
        # Load candidates from db and create objects to access their elements
        self.candidates = CandidateDAO().all()
        ConcurrencyUtils().create_lock('candidate_for_update')

    def get_all(self):
        """ Returns all candidates currently in the list. """
        return self.candidates

    def get_for_follower_updating(self):
        """ Polls a candidate for updating its follower list. """
        # Lock to avoid concurrency issues when retrieving candidates across threads
        ConcurrencyUtils().acquire_lock('candidate_for_update')
        for candidate in self.candidates:
            # We will only return a candidate if it was not updated today and is not being currently updated
            if candidate not in self.updating_followers and not DateUtils.is_today(
                    candidate.last_updated_followers):
                self.logger.info(
                    f'Returning candidate {candidate.screen_name} for follower retrieval.'
                )
                self.updating_followers.add(candidate)
                # Unlock
                ConcurrencyUtils().release_lock('candidate_for_update')
                return candidate
        # Unlock
        ConcurrencyUtils().release_lock('candidate_for_update')
        raise FollowerUpdatingNotNecessaryError()

    def finish_follower_updating(self, candidate):
        """ Unlock user for follower updating and update last updating time. """
        if candidate not in self.updating_followers:
            raise CandidateCurrentlyAvailableForUpdateError(
                candidate.screen_name)
        # Update last updated followers date
        self.logger.info(
            f'Removing candidate {candidate.screen_name} from currently updating set.'
        )
        candidate.last_updated_followers = datetime.now()
        CandidateDAO().overwrite(candidate)
        # Remove from set to not be polled again
        self.updating_followers.remove(candidate)

    def add_candidate(self, screen_name, nickname=None):
        """ Add a candidate with given screen name and nickname to the database and to the json file. """
        try:
            CandidateDAO().find(screen_name)
        except NonExistentCandidateError:
            self.logger.info(f'Adding candidate {screen_name} to database.')
            candidate = Candidate(**{
                'screen_name': screen_name,
                'nickname': nickname
            })
            # Store in database
            CandidateDAO().save(candidate)
            # Update json resource
            CandidateDAO().update_json_resource(candidate)
            # Update current structure
            self.candidates.append(candidate)
            return
        raise CandidateAlreadyExistsError(screen_name)
Пример #16
0
 def get_logger(cls):
     return Logger('FollowerSupportService')
Пример #17
0
 def get_logger(cls):
     return Logger('HashtagCooccurrenceService')
Пример #18
0
 def __init__(self):
     super(HashtagUsageDAO, self).__init__(Mongo().get().db.hashtag_usage)
     self.logger = Logger(self.__class__.__name__)
Пример #19
0
 def __init__(self):
     super(RawFollowerDAO, self).__init__(Mongo().get().db.raw_followers)
     self.logger = Logger(self.__class__.__name__)
Пример #20
0
 def __init__(self):
     super(CommunityStrengthDAO,
           self).__init__(Mongo().get().db.community_strength)
     self.logger = Logger(self.__class__.__name__)
Пример #21
0
class CandidateDAO(GenericDAO, metaclass=Singleton):

    FILE_PATH = f"{abspath(join(dirname(__file__), '../../'))}/resources/candidates.json"

    def __init__(self):
        super(CandidateDAO, self).__init__(Mongo().get().db.candidates)
        self.logger = Logger(self.__class__.__name__)

    def find(self, screen_name):
        """ Get user with given screen name. """
        as_dict = self.get_first({'_id': screen_name})
        if as_dict is None:
            raise NonExistentCandidateError(screen_name)
        # Transform from DB format to DTO format
        as_dict['screen_name'] = as_dict['_id']
        return Candidate(**as_dict)

    def overwrite(self, candidate):
        """ Update candidate's fields (except for screen name). """
        self.update_first(
            {'_id': candidate.screen_name}, {
                'nickname': candidate.nickname,
                'last_updated_followers': candidate.last_updated_followers
            })

    def save(self, candidate):
        """ Store candidate. """
        # Transform from DTO format to DB format
        to_insert = {
            '_id': candidate.screen_name,
            'nickname': candidate.nickname,
            'last_updated_followers': candidate.last_updated_followers
        }
        return self.insert(to_insert)

    def all(self):
        """ Get all currently stored candidates. """
        candidates = []
        as_dict_list = self.get_all()
        for as_dict in as_dict_list:
            # Transform from DB format to DTO format
            as_dict['screen_name'] = as_dict['_id']
            candidates.append(Candidate(**as_dict))
        return candidates

    def create_indexes(self):
        # There are no indexes to create for this collection
        pass

    def create_base_entries(self):
        # Check if collection is empty
        if self.get_all().count() > 0:
            return
        # Load candidates
        self.logger.info('Loading candidates from file into database.')
        with open(CandidateDAO.FILE_PATH, 'r') as file:
            candidates = json.load(file)
        # Store entries
        for candidate in candidates:
            # Transform for database format
            to_insert = {
                '_id': candidate['screen_name'],
                'nickname': candidate['nickname']
            }
            self.insert(to_insert)

    def update_json_resource(self, candidate):
        """ Add candidate to json file. """
        self.logger.info(
            f'Storing candidate {candidate.screen_name} into file.')
        with open(CandidateDAO.FILE_PATH, 'r') as file:
            candidates = json.load(file)
        # Append new candidate
        candidates.append({
            'screen_name': candidate.screen_name,
            'nickname': candidate.nickname
        })
        # Write to file
        with open(CandidateDAO.FILE_PATH, 'w') as file:
            json.dump(candidates, file)

    def get_required_candidates(self):
        """ Retrieve dictionary like: {candidate: index}. """
        candidates = self.get_all({'index': {'$exists': True}})
        candidate_index = {}
        candidate_group = {}
        for candidate in candidates:
            candidate_index[candidate['_id']] = candidate['index']
            candidate_group[candidate['index']] = candidate['group']
        return candidate_index, candidate_group
Пример #22
0
 def __init__(self):
     super(RawTweetDAO, self).__init__(Mongo().get().db.raw_tweets)
     # self.__dict__.update(**kwargs)
     self.logger = Logger(self.__class__.__name__)
Пример #23
0
 def __init__(self):
     super(SimilarityDAO, self).__init__(Mongo().get().db.users_similarities)
     self.logger = Logger(self.__class__.__name__)
Пример #24
0
 def get_logger(cls):
     return Logger('TweetUpdateService')
Пример #25
0
class CredentialService(metaclass=Singleton):

    CREDENTIALS_PATH = f"{abspath(join(dirname(__file__), '../../..'))}/twitter_credentials.json"

    def __init__(self):
        self.logger = Logger(self.__class__.__name__)
        self.in_use = set()
        self.credentials = []
        # Load credentials file and create objects to access their elements
        try:
            with open(CredentialService.CREDENTIALS_PATH, 'r') as file:
                loaded = json.load(file)
                for value in loaded:
                    self.credentials.append(Credential(**value))
        except IOError:
            self.logger.error('Credentials file do not found')

    def get_all_credentials_for_service(self, service_id):
        """ Return all credentials for a given service. """
        self.logger.info(
            f'Returning all credentials for service {service_id}.')
        # Check if some credential has already been assigned
        for credential in self.credentials:
            if f"{credential.id}-{service_id}" in self.in_use:
                raise CredentialsAlreadyInUseError(service_id)
        self.logger.info('Checked credentials')
        # Store in the in use set. We iterate twice because the number of credentials is small and it is easier than
        # doing rollbacks with the already stored credentials if we need to raise an exception
        for credential in self.credentials:
            self.in_use.add(f"{credential.id}-{service_id}")
        return self.credentials

    def get_credential_for_service(self, service_id):
        """ Get credential if current service is not using all of the available credentials. """
        for credential in self.credentials:
            if f"{credential.id}-{service_id}" not in self.in_use:
                self.logger.info(
                    f'Returning credential {credential.id} for service {service_id}.'
                )
                self.in_use.add(f"{credential.id}-{service_id}")
                return credential
        raise NoAvailableCredentialsError(service_id)

    def get_credential_with_id_for_service(self, credential_id, service_id):
        """ Get credential if current service is not using all of the available credentials. """
        for credential in self.credentials:
            if credential_id == credential.id and f"{credential.id}-{service_id}" not in self.in_use:
                self.logger.info(
                    f'Returning credential {credential.id} for service {service_id}.'
                )
                self.in_use.add(f"{credential.id}-{service_id}")
                return credential
        raise NoAvailableCredentialsError(service_id)

    def unlock_credential(self, credential_id, service_id):
        """ Unlock credential for a given service. """
        key = f'{credential_id}-{service_id}'
        if key not in self.in_use:
            raise CredentialCurrentlyAvailableError(key)
        self.logger.info(
            f'Unlocking credential {credential_id} for service {service_id}.')
        self.in_use.remove(key)
Пример #26
0
 def __init__(self):
     super(ShowableGraphDAO,
           self).__init__(Mongo().get().db.showable_graphs)
     self.logger = Logger(self.__class__.__name__)
Пример #27
0
 def get_logger(cls):
     return Logger(cls.__name__)
Пример #28
0
 def __init__(self):
     super(CooccurrenceGraphDAO, self).__init__(Mongo().get().db.cooccurrence_graphs)
     self.logger = Logger(self.__class__.__name__)
Пример #29
0
 def get_logger(cls):
     return Logger('CSVUtils')
Пример #30
0
 def __init__(self):
     super(PartyRelationshipsDAO,
           self).__init__(Mongo().get().db.party_relationships)
     self.logger = Logger(self.__class__.__name__)