def process_twitter_data( status: Status, twitter_credentials: dict, add_sentiment: bool = True, add_bot_analysis: bool = True) -> TwitterDataOutput: output: TwitterDataOutput = TwitterDataOutput(status={}, user={}) try: # 1. Retrieve status document status_doc: StatusDoc = TwitterDataProcessor.process_status( status=status) response_sent: SentimentAnalysisOutput = SentimentAnalysisOutput() # 2. Add additional parameters related to sentiment analysis if add_sentiment: if len(status_doc.text) >= TwitterDataProcessor.min_char: response_sent: SentimentAnalysisOutput = TwitterDataProcessor.process_sentiment_analysis( doc=status_doc.text) status_doc.sentiment_analysis: dict = response_sent.__dict__ # 3. Get the user user: User = status.__getattribute__("user") user_doc: UserAccountDoc = TwitterDataProcessor.process_user( user=user) # 4. Add additional params if add_bot_analysis: response_botometer_analysis: BotometerAnalysisOutput = TwitterDataProcessor.process_botometer_analysis( user_id=user_doc.id, twitter_credentials=twitter_credentials) user_doc.botometer_analysis: dict = response_botometer_analysis.__dict__ # 5. Get the output output: TwitterDataOutput = TwitterDataOutput( status=status_doc.__dict__, user=user_doc.__dict__) except Exception as e: logger.error(e) return output
def on_status(self, status: Status): logger.info(f"1. Loading Status with ID {status.__getattribute__('id')}") # 1. Check whether the status is already in the storage non_exists: bool = self.check_data_in_storage( entity_id=status.__getattribute__('id'), storage=self.storage, collection_name=self.collection_names.get("status"), identifier_key="id") if non_exists: # 3. Process Tweets an Users logger.info(f"2. Pre-processing Status with ID {status.__getattribute__('id')}") data: TwitterDataOutput = self.process_status( status=status, add_sentiment=self.add_sentiment, add_bot_analysis=self.add_bot_analysis) logger.info(f"3. Storing Status with ID {status.__getattribute__('id')} in {self.storage.title()}") # 2. Storage data self.storage_data(data=data, collection_names=self.collection_names, storage=self.storage, mongodb_connector=self.mongodb_connector, elasticsearch_connector=self.elasticsearch_connector, identifier_key=self.identifier_key)