def update_path_counts(model_name, date, test_stats_by_corpus): """Combine path counts from all test corpora and update in the database.""" db = get_db('stmt') path_count_dict = Counter() for test_corpus, test_stats in test_stats_by_corpus.items(): stmt_counts = test_stats['test_round_summary'].get( 'path_stmt_counts', []) path_count_dict += Counter(dict(stmt_counts)) path_count_dict = dict(path_count_dict) db.update_statements_path_counts(model_name, date, path_count_dict)
def save_stmts(stmts, model_name, save_to_db): stmts_json = stmts_to_json(stmts) # Save a timestapmed version and a generic latest version of files dated_key = f'assembled/{model_name}/statements_{self.date_str}' latest_key = f'assembled/{model_name}/' \ f'latest_statements_{model_name}' for ext in ('json', 'jsonl'): latest_obj_key = latest_key + '.' + ext logger.info('Uploading assembled statements to ' f'{latest_obj_key}') save_json_to_s3(stmts_json, bucket, latest_obj_key, ext) dated_jsonl = dated_key + '.jsonl' dated_zip = dated_key + '.gz' logger.info(f'Uploading assembled statements to {dated_jsonl}') save_json_to_s3(stmts_json, bucket, dated_jsonl, 'jsonl') logger.info(f'Uploading assembled statements to {dated_zip}') save_gzip_json_to_s3(stmts_json, bucket, dated_zip, 'json') if save_to_db: db = get_db('stmt') db.add_statements(model_name, self.date_str[:10], stmts_json)
def upload_results(self, test_corpus='large_corpus_tests', test_data=None, upload_to_db=True, bucket=EMMAA_BUCKET_NAME): """Upload results to s3 bucket.""" json_dict, json_lines = self.results_to_json(test_data) result_key = (f'results/{self.model.name}/results_' f'{test_corpus}_{self.date_str}.json') paths_key = (f'paths/{self.model.name}/paths_{test_corpus}_' f'{self.date_str}.jsonl') latest_paths_key = (f'paths/{self.model.name}/{test_corpus}' '_latest_paths.jsonl') logger.info(f'Uploading test results to {result_key}') save_json_to_s3(json_dict, bucket, result_key) logger.info(f'Uploading test paths to {paths_key}') save_json_to_s3(json_lines, bucket, paths_key, save_format='jsonl') save_json_to_s3(json_lines, bucket, latest_paths_key, 'jsonl') # Also save the path counts to the database if requested if upload_to_db: db = get_db('stmt') db.update_statements_path_counts( self.model.name, self.date_str[:10], self.path_stmt_counts)
import os from emmaa.db import get_db from emmaa.subscription.email_service import send_email, \ notifications_sender_default, notifications_return_default from emmaa.subscription.notifications import get_user_query_delta indra_bio_ARN = os.environ.get('INDRA_BIO_ARN') if __name__ == '__main__': db = get_db('primary') subscribed_users = db.get_subscribed_users() subject_line = 'You have an update to your queries on EMMAA' for user_email in subscribed_users: delta_str_msg, delta_html_msg = get_user_query_delta(db, user_email) # If there is a delta, send an email if delta_html_msg: res = send_email(sender=notifications_sender_default, recipients=[user_email], subject=subject_line, body_text=delta_str_msg, body_html=delta_html_msg, source_arn=indra_bio_ARN, return_email=notifications_return_default, return_arn=indra_bio_ARN )
def __init__(self, db=None, model_managers=None): self.db = db if db is None: self.db = get_db('primary') self.model_managers = model_managers if model_managers else []