def main(string_args: Optional[list[str]] = None) -> None: """Clean all support tickets marked for deletion.""" user_db = mongo.get_connections_from_env().user_db parser = argparse.ArgumentParser( description='Clean support tickets from the database.') report.add_report_arguments(parser) args = parser.parse_args(string_args) if not report.setup_sentry_logging(args): return instant = proto.datetime_to_json_string(now.get()) result = user_db.user.update_many( {}, {'$pull': { 'supportTickets': { 'deleteAfter': { '$lt': instant } } }}) logging.info('Removed deprecated support tickets for %d users.', result.modified_count) clean_result = user_db.user.update_many({'supportTickets': { '$size': 0 }}, {'$unset': { 'supportTickets': '' }}) if clean_result.matched_count: logging.info('Removed empty support ticket list for %d users.', clean_result.modified_count)
def _send_focus_emails( action: 'campaign.NoGhostAction', dry_run_email: str, restricted_campaigns: Optional[Iterable[mailjet_templates.Id]] = None) -> None: database, users_database, eval_database = mongo.get_connections_from_env() instant = now.get() email_errors = 0 counts = { campaign_id: 0 for campaign_id in sorted(get_possible_campaigns(database, restricted_campaigns)) } potential_users = users_database.user.find({ 'profile.email': { '$regex': re.compile(r'[^ ]+@[^ ]+\.[^ ]+'), '$not': re.compile(r'@example.com$'), }, 'projects': {'$elemMatch': { 'isIncomplete': {'$ne': True}, }}, 'profile.coachingEmailFrequency': {'$in': [ email_pb2.EmailFrequency.Name(setting) for setting in _EMAIL_PERIOD_DAYS]}, # Note that "not >" is not equivalent to "<=" in the case the field # is not defined: in that case we do want to select the user. 'sendCoachingEmailAfter': {'$not': {'$gt': proto.datetime_to_json_string(instant)}}, }) restricted_campaigns_set: Optional[Set[mailjet_templates.Id]] if restricted_campaigns: restricted_campaigns_set = set(restricted_campaigns) else: restricted_campaigns_set = None for user_dict in potential_users: user_id = user_dict.pop('_id') user = proto.create_from_mongo(user_dict, user_pb2.User) user.user_id = str(user_id) try: campaign_id = send_focus_email_to_user( action, user, dry_run_email=dry_run_email, database=database, users_database=users_database, eval_database=eval_database, instant=instant, restricted_campaigns=restricted_campaigns_set) except requests.exceptions.HTTPError as error: if action == 'dry-run': raise logging.warning('Error while sending an email: %s', error) email_errors += 1 continue if campaign_id: counts[campaign_id] += 1 if action == 'dry-run': break continue report_message = 'Focus emails sent today:\n' + '\n'.join([ f' • *{campaign_id}*: {count} email{"s" if count > 1 else ""}' for campaign_id, count in counts.items() ]) if action == 'send': report.notify_slack(report_message) logging.info(report_message)
def _compute_nps_report(users: Iterable[user_pb2.User], from_date: str, to_date: str) -> str: score_distribution: dict[int, int] = collections.defaultdict(int) nps_total = 0 num_users = 0 responses_with_comment: list[Tuple[str, user_pb2.NPSSurveyResponse]] = [] for user in users: num_users += 1 response = user.net_promoter_score_survey_response score_distribution[response.score] += 1 # TODO(pascal): Move that to a common library so that it's always # compute the same way. if response.score <= 5: nps_total -= 1 elif response.score > 7: nps_total += 1 if response.general_feedback_comment: responses_with_comment.append((user.user_id, response)) user_db = mongo.get_connections_from_env().user_db # Total number of users that we asked for NPS during that time. total_num_users = user_db.user.count_documents({ 'featuresEnabled.excludeFromAnalytics': { '$ne': True }, 'emailsSent': { '$elemMatch': { 'campaignId': 'nps', # Note that this is not taking the same base, as we are counting # users for which we sent an NPS during a given period, and then # those who answered during the same period. 'sentAt': { '$gt': from_date, '$lt': to_date, }, } }, }) def _display_func( id_and_response: Tuple[str, user_pb2.NPSSurveyResponse]) -> str: return f'[Score: {id_and_response[1].score}] ObjectId("{id_and_response[0]}")\n> ' + \ ('\n> '.join(id_and_response[1].general_feedback_comment.split('\n'))) comments = _report_comments( sorted(responses_with_comment, key=lambda r: -r[1].score), _display_func) answer_rate = round(num_users * 100 / total_num_users) if total_num_users else 0 nps = round(nps_total * 1000 / num_users) / 10 if num_users else 0 score_distributions = '\n'.join( f'*{score}*: {score_distribution[score]} user{_plural_s(score_distribution[score])}' for score in sorted(score_distribution.keys(), reverse=True)) # TODO(emilie): Use "(out of n, xx% answer rate)" instead of "(out of n - xx% answer rate)" return f'{num_users} user{_plural_s(num_users)} answered the NPS survey ' \ f'(out of {total_num_users} - {answer_rate}% answer rate) ' \ f'for a global NPS of *{nps}%*\n' \ f'{score_distributions}\n{comments}'
def save_user(user_data: user_pb2.User) -> user_pb2.User: """Save a user in the database.""" unused_, users_database, unused_ = mongo.get_connections_from_env() users_database = users_database.with_prefix('jobflix_') collection = users_database.user if user_data.profile.email: if db_user := collection.find_one( {'hashedEmail': (hashed_email := auth.hash_user_email(user_data.profile.email))}, {'_id': 1, 'projects': 1}): user_data.user_id = str(db_user['_id']) new_projects = list(user_data.projects[:]) user_data.ClearField('projects') user_data.projects.extend( proto.create_from_mongo(p, project_pb2.Project, always_create=True) for p in db_user.get('projects', [])) old_project_ids = {p.project_id for p in user_data.projects} user_data.projects.extend( p for p in new_projects if _make_project_id(p) not in old_project_ids) elif user_data.user_id: collection.update_one({'_id': objectid.ObjectId(user_data.user_id)}, {'$set': { 'profile.email': user_data.profile.email, 'hashedEmail': hashed_email, }})
def main(string_args: Optional[list[str]] = None) -> None: """Parse command line arguments and trigger the update_users_client_metrics function.""" parser = argparse.ArgumentParser( description='Synchronize MongoDB client metrics fields from Amplitude') parser.add_argument( '--registered-from', help='Consider only users who registered after this date.') yesterday = str((now.get() - datetime.timedelta(days=1)).date()) parser.add_argument( '--registered-to', default=yesterday, help='Consider only users who registered before this date.') report.add_report_arguments(parser) args = parser.parse_args(string_args) if not report.setup_sentry_logging(args): return user_db = mongo.get_connections_from_env().user_db update_users_client_metrics(user_db.user, from_date=args.registered_from, to_date=args.registered_to, dry_run=args.dry_run)
def export_feedback(request: feedback_pb2.FeedbackExportRequest) -> flask.Response: """Export feedback.""" if not request.HasField('after'): flask.abort(422, 'The parameter "after" is required.') after_time = request.after.ToDatetime() id_filter = { '$gt': objectid.ObjectId.from_datetime(after_time), } if request.HasField('before'): before_time = request.before.ToDatetime() id_filter['$lte'] = objectid.ObjectId.from_datetime(before_time) else: before_time = datetime.datetime.utcnow() user_database = mongo.get_connections_from_env().user_db entries = user_database.feedbacks.find({'_id': id_filter}) output = io.StringIO() writer = csv.writer(output) writer.writerow(['date', 'source', 'text', 'score', 'user_id']) for feedback_entry in entries: entry_id = objectid.ObjectId(feedback_entry['_id']) writer.writerow(( entry_id.generation_time.strftime('%Y-%m-%d'), feedback_entry.get('source', ''), feedback_entry.get('feedback', ''), str(score) if (score := feedback_entry.get('score', 0)) else '', feedback_entry.get('userId', ''), ))
def get_relevant_main_challengess(use_case: use_case_pb2.UseCase) \ -> diagnostic_pb2.DiagnosticMainChallenges: """Analyse a use case for each main challenge, and return whether it is relevant in the case.""" result = diagnostic_pb2.DiagnosticMainChallenges() result.categories.extend(challenge for challenge, _ in diagnostic.set_main_challenges_relevance( use_case.user_data, database=mongo.get_connections_from_env().stats_db)) return result
def get_scoring_project(user_id: str, project_id: str) -> scoring.ScoringProject: """Get the scoring project or abort.""" user_proto = get_user_data(user_id) project = get_project_data(user_proto, project_id) return scoring.ScoringProject(project, user_proto, mongo.get_connections_from_env().stats_db, now=now.get())
def user_to_analytics_data(user: user_pb2.User) -> dict[str, Any]: """Gather analytics data to insert into elasticsearch.""" stats_db = mongo.get_connections_from_env().stats_db has_opened_strategy = False data: dict[str, Any] = { 'registeredAt': user.registered_at.ToJsonString(), 'randomGroup': random.randint(0, 100) / 100, 'profile': { 'ageGroup': age_group(user.profile.year_of_birth), 'coachingEmailFrequency': email_pb2.EmailFrequency.Name( user.profile.coaching_email_frequency), # TODO(sil): Use more relevant names for gender fields. 'customGender': user.profile.custom_gender, 'familySituation': user_profile_pb2.FamilySituation.Name( user.profile.family_situation), 'frustrations': [ user_profile_pb2.Frustration.Name(f) for f in user.profile.frustrations ], 'gender': user_profile_pb2.Gender.Name(user.profile.gender), 'hasHandicap': user.profile.has_handicap, 'highestDegree': _get_degree_level(user.profile.highest_degree), 'isArmyVeteran': user.profile.is_army_veteran, 'locale': user.profile.locale or 'fr', 'origin': user_profile_pb2.UserOrigin.Name(user.profile.origin), }, 'featuresEnabled': json_format.MessageToDict(user.features_enabled), 'origin': { 'medium': user.origin.medium, 'source': user.origin.source, 'campaign': user.origin.campaign, }, 'hasAccount': user.has_account, } def _add_scored_challenge(name: str, challenge_id: Optional[str]) -> None: if not challenge_id: return if not (score := data.get('nps_response', {}).get( 'challengeScores', {}).get(challenge_id)): return data['nps_response']['challengeScores'][name] = score
def get_available_areas() -> upskilling_pb2.Areas: """Return all the areas that have interesting data.""" database = mongo.get_connections_from_env().stats_db area_ids = { doc.get('_id') for doc in database.best_jobs_in_area.find({}, {'_id': 1}) } | { doc.get('_id') for doc in database.departements.find({}, {'_id': 1}) } return upskilling_pb2.Areas(area_ids=sorted(area_ids))
def make_diagnostic_main_challenge_distribution(request: use_case_pb2.UseCasesDistributionRequest) \ -> use_case_pb2.UseCaseDistribution: """See how use cases are distributed in the different diagnostic main challenges.""" database, unused_, eval_db = mongo.get_connections_from_env() use_case_iterator = ( proto.create_from_mongo(use_case_json, use_case_pb2.UseCase, 'use_case_id') for use_case_json in eval_db.use_case.find( {'_id': _AUTOMATIC_EVAL_USE_CASE_ID_REGEX} ).sort([('_id', -1)]).limit(request.max_use_cases or _MAX_SEARCHED_USE_CASES)) return _make_diagnostic_main_challenge_distribution( use_case_iterator, database, request.categories)
def _compute_stars_report(users: Iterable[user_pb2.User], from_date: str, to_date: str) -> str: score_distribution: dict[int, int] = collections.defaultdict(int) stars_total = 0 num_projects = 0 responses_with_comment = [] for user in users: for project in user.projects: feedback = project.feedback if not feedback.score: continue num_projects += 1 stars_total += feedback.score score_distribution[feedback.score] += 1 if feedback.text: responses_with_comment.append(feedback) user_db = mongo.get_connections_from_env().user_db # Total number of finished projects during that time. total_num_projects = user_db.user.count_documents({ 'featuresEnabled.excludeFromAnalytics': { '$ne': True }, 'projects.diagnostic': { '$exists': True }, 'projects.createdAt': { '$gt': from_date, '$lt': to_date, } }) answer_rate = round(num_projects * 100 / total_num_projects) if total_num_projects else 0 average_stars = round(stars_total * 10 / num_projects) / 10 if num_projects else 0 score_distributions = '\n'.join( f'{":star:" * score}: {score_distribution[score]} ' f'project{_plural_s(score_distribution[score])}' for score in sorted(score_distribution.keys(), reverse=True)) comments = _report_comments( sorted(responses_with_comment, key=lambda r: -r.score), lambda response: f'[{":star:" * response.score}]\n> ' + '\n> '.join( response.text.split('\n')), ) return f'{num_projects} project{_plural_s(num_projects)} ' \ f'{"was" if num_projects == 1 else "were"} scored in the app ' \ f'(out of {total_num_projects} - {answer_rate}% answer rate) ' \ f'for a global average of *{average_stars} :star:*\n' \ f'{score_distributions}\n{comments}'
def export_user_to_elasticsearch(es_client: elasticsearch.Elasticsearch, index: str, registered_from: str, force_recreate: bool, dry_run: bool = True) -> None: """Synchronize users to elasticsearch for analytics purpose.""" user_db = mongo.get_connections_from_env().user_db if not dry_run: has_previous_index = es_client.indices.exists(index=index) if force_recreate and has_previous_index: logging.info('Removing old bobusers index ...') es_client.indices.delete(index=index) if force_recreate or not has_previous_index: logging.info('Creating bobusers index ...') es_client.indices.create(index=index) nb_users = 0 nb_docs = 0 cursor = user_db.user.find({ 'registeredAt': { '$gt': registered_from }, 'featuresEnabled.excludeFromAnalytics': { '$ne': True }, }) for row in cursor: nb_users += 1 user = proto.create_from_mongo(row, user_pb2.User, 'user_id') data = user_to_analytics_data(user) logging.debug(data) if not dry_run: # TODO(cyrille): Consider using the noop feature to avoid re-computing inactive users # endlessly. es_client.update(index=index, doc_type='_doc', id=user.user_id, body={ 'doc': data, 'doc_as_upsert': True, }) nb_docs += 1 if nb_docs % 1000 == 0: logging.info('%i users processed', nb_docs) if not dry_run: es_client.indices.flush(index=index)
def _save_project( project: project_pb2.Project, unused_previous_project: project_pb2.Project, user_data: user_pb2.User) -> project_pb2.Project: database, users_database, eval_database = mongo.get_connections_from_env() users_database = users_database.with_prefix('jobflix_') project.project_id = _make_project_id(project) if not project.HasField('created_at'): common_proto.set_date_now(project.created_at) if user_data.profile.email: all_campaigns.send_campaign( 'jobflix-welcome', user_data, action='send', database=database, users_database=users_database, eval_database=eval_database, now=now.get()) _give_coaching_feedback(user_data.user_id, user_data.profile.email, project) return project
def get_sections_for_project(user_proto: user_pb2.User) -> upskilling_pb2.Sections: """Return all the sections to browse.""" if not user_proto.projects: flask.abort(422, i18n.flask_translate("Il n'y a pas de projet à explorer.")) project = user_proto.projects[0] database = mongo.get_connections_from_env().stats_db scoring_project = scoring.ScoringProject(project, user_proto, database) result = upskilling_pb2.Sections() good_jobs = jobs.get_all_good_job_group_ids(scoring_project.database) best_salaries = { job.job_group.rome_id for job in _get_best_jobs_in_area(scoring_project).best_salaries_jobs} slots = list(_SECTION_SLOTS.get_collection(database)) are_all_jobs_hiring = _get_are_all_jobs_hiring() for section in slots: if section.is_for_alpha_only and not user_proto.features_enabled.alpha: continue generator_id = section.generator try: generator = _SECTION_GENERATORS[generator_id] except KeyError: logging.error('Unknown upskilling section generator "%s"', generator_id) continue computed_section = generator.get_jobs( scoring_project=scoring_project, allowed_job_ids=good_jobs, previous_sections={ section.id for section in result.sections if section.state.startswith(f'{generator_id}:') }) if not computed_section or len(computed_section.jobs) < 2: continue result.sections.add( id=computed_section.new_id or generator_id, state=f'{generator_id}:{computed_section.state or ""}', name=scoring_project.populate_template(scoring_project.translate_key_string( f'jobflix_sections:{computed_section.new_id or generator_id}', hint=computed_section.new_name or generator.name, context=_get_bob_deployment(), is_hint_static=True)), jobs=[ _add_perks_to_job(job, best_salaries, is_hiring=are_all_jobs_hiring) for job in computed_section.jobs], ) return result
def create_use_case(request: use_case_pb2.UseCaseCreateRequest, requester_email: str) \ -> use_case_pb2.UseCase: """Create a use case from a user.""" unused_, user_database, database = mongo.get_connections_from_env() identifier = request.WhichOneof('identifier') if not identifier: flask.abort(400, "Il manque un identifiant pour créer le cas d'usage.") query: dict[str, Any] if request.email: _log_request(request.email, requester_email, database) query = {'hashedEmail': auth.hash_user_email(request.email)} elif request.ticket_id: query = {'supportTickets.ticketId': request.ticket_id} else: query = {'_id': objectid.ObjectId(request.user_id)} # Find user. user_dict = user_database.user.find_one(query) if not user_dict: flask.abort( 404, f'Aucun utilisateur avec l\'identifiant "{getattr(request, identifier)}" ' f"({identifier}) n\'a été trouvé.") # Find next free index in use case pool. last_use_case_in_pool = database.use_case.find( {'poolName': request.pool_name}, {'_id': 0, 'indexInPool': 1}, ).sort('indexInPool', pymongo.DESCENDING).limit(1) next_index = next((u.get('indexInPool', 0) for u in last_use_case_in_pool), -1) + 1 # Convert user to use case. use_case_proto = privacy.user_to_use_case(user_dict, request.pool_name, next_index) if not use_case_proto: flask.abort(500, 'Impossible to read user data.') if not request.pool_name: return use_case_proto # Save use case. use_case = json_format.MessageToDict(use_case_proto) use_case['_id'] = use_case.pop('useCaseId') database.use_case.insert_one(use_case) return use_case_proto
def setUp(self) -> None: super().setUp() self.addCleanup(cache.clear) env_patcher = mock.patch.dict( os.environ, { 'MONGO_URL': 'mongodb://mydata.com/test', 'USERS_MONGO_URL': 'mongodb://my-database/test', }) env_patcher.start() self.addCleanup(env_patcher.stop) db_patcher = mongomock.patch( (('my-database', 27017), ('mydata.com', 27017))) db_patcher.start() self.addCleanup(db_patcher.stop) self._stats_db, self._user_db, self._eval_db = mongo.get_connections_from_env( )
def delete_user(*, user_id: str) -> Tuple[str, int]: """Delete a user from their internal ID.""" user_db = mongo.get_connections_from_env().user_db.with_prefix('jobflix_') auth_token = flask.request.headers.get('Authorization', '').replace('Bearer ', '') or \ flask.request.args.get('token', '') user_data = user_pb2.User(user_id=user_id) user_data.profile.email = flask.request.args.get('email', '') deleted_id = user.delete_user(user_data, auth_token, user_db=user_db) if not deleted_id: return i18n.flask_translate( "Nous n'avons pas trouvé votre email dans notre base de données.\n" 'Si vous ne vous étiez pas encore désabonné·e, ' 'contactez nous à [email protected] pour vous assurer de votre désinscription.'), 404 return i18n.flask_translate( 'Votre requête a été prise en compte.\n' 'Votre adresse email sera supprimée de la base Jobflix dans les prochaines 24 heures.'), 202
def _compute_agreement_report(users: Iterable[user_pb2.User], from_date: str, to_date: str) -> str: score_distribution: dict[int, int] = collections.defaultdict(int) agreement_total = 0 num_projects = 0 for user in users: for project in user.projects: feedback = project.feedback if not feedback.challenge_agreement_score: continue num_projects += 1 agreement_total += feedback.challenge_agreement_score - 1 score_distribution[feedback.challenge_agreement_score - 1] += 1 user_db = mongo.get_connections_from_env().user_db # Total number of finished projects during that time. total_num_projects = user_db.user.count_documents({ 'featuresEnabled.excludeFromAnalytics': { '$ne': True }, 'projects.diagnostic': { '$exists': True }, 'projects.createdAt': { '$gt': from_date, '$lt': to_date, } }) answer_rate = round(num_projects * 100 / total_num_projects) if total_num_projects else 0 average_agreement = round(agreement_total * 10 / num_projects) / 10 if num_projects else 0 score_distributions = '\n'.join( f'{score}/4: {score_distribution[score]} ' f'project{_plural_s(score_distribution[score])}' for score in sorted(score_distribution.keys(), reverse=True)) return f'{num_projects} project challenge{_plural_s(num_projects)} ' \ f'{"was" if num_projects == 1 else "were"} evaluated in the app ' \ f'(out of {total_num_projects} - {answer_rate}% answer rate) ' \ f'for a global average agreement of *{average_agreement}/4*\n' \ f'{score_distributions}'
def _send_focus_emails(action: 'campaign.Action', dry_run_email: str) -> None: database, users_database, unused_eval_database = mongo.get_connections_from_env() instant = now.get() email_errors = 0 counts = {campaign_id: 0 for campaign_id in _FOCUS_CAMPAIGNS} potential_users = users_database.user.find({ 'profile.email': re.compile('.+@.+'), 'projects': {'$elemMatch': { 'isIncomplete': {'$ne': True}, }}, 'profile.coachingEmailFrequency': {'$in': [ user_pb2.EmailFrequency.Name(setting) for setting in _EMAIL_PERIOD_DAYS]}, # Note that "not >" is not equivalent to "<=" in the case the field # is not defined: in that case we do want to select the user. 'sendCoachingEmailAfter': {'$not': {'$gt': proto.datetime_to_json_string(instant)}}, }) for user_dict in potential_users: user_id = user_dict.pop('_id') user = typing.cast(user_pb2.User, proto.create_from_mongo(user_dict, user_pb2.User)) user.user_id = str(user_id) try: campaign_id = send_focus_email_to_user( action, user, dry_run_email=dry_run_email, database=database, users_database=users_database, instant=instant) except requests.exceptions.HTTPError as error: if action == 'dry-run': raise logging.warning('Error while sending an email: %s', error) email_errors += 1 continue if campaign_id: counts[campaign_id] += 1 continue report_message = 'Focus emails sent:\n' + '\n'.join([ f' • *{campaign_id}*: {count} email{"s" if count > 1 else ""}' for campaign_id, count in counts.items() ]) if action == 'send': report.notify_slack(report_message) logging.info(report_message)
def _compute_and_send_report(report_id: str, from_date: str, to_date: str, out: TextIO, dry_run: bool = True) -> None: if not to_date: to_date = datetime.datetime.now().strftime('%Y-%m-%dT%H-%M') report = _REPORTS[report_id] user_db = mongo.get_connections_from_env().user_db selected_users = user_db.user.find( report.mongo_filters | { 'featuresEnabled.excludeFromAnalytics': { '$ne': True }, 'registeredAt': { '$lt': to_date }, report.timestamp_field: { '$gt': from_date, '$lt': to_date, } }, {field: 1 for field in report.required_fields}) report_text = report.compute_report( (_create_user_proto_with_user_id(user) for user in selected_users), from_date=from_date, to_date=to_date) if dry_run: out.write(report_text) return if _SLACK_FEEDBACK_URL: requests.post(_SLACK_FEEDBACK_URL, json={ 'attachments': [{ 'color': report.color, 'mrkdwn_in': ['text'], 'title': f'{report.title} from {from_date} to {to_date}', 'text': report_text, }] })
def main(now: datetime.datetime, days_before_sending: str) -> None: """Send an email to users that signed up more than n days ago list of users.""" stats_db, user_db, unused_eval_db = mongo.get_connections_from_env() query = dict(_NPS_CAMPAIGN.mongo_filters, **{ 'profile.email': re.compile('@'), }) count = 0 user_iterator: Iterator[Dict[str, Any]] = user_db.user.find( query, ( '_id', 'registeredAt', 'emailsSent', 'profile.email', 'profile.lastName', 'profile.locale', 'profile.name', )) errors: List[str] = [] registered_before = (now - datetime.timedelta(days=int(days_before_sending)))\ .replace(hour=_DAY_CUT_UTC_HOUR, minute=0, second=0, microsecond=0) for user_in_db in user_iterator: user = user_pb2.User() user_id = user_in_db.pop('_id') json_format.ParseDict(user_in_db, user) user.user_id = str(user_id) if user.registered_at.ToDatetime() > registered_before: # Skip silently: will send another day. continue if not _NPS_CAMPAIGN.send_mail(_CAMPAIGN_ID, user, database=stats_db, users_database=user_db, action='dry-run' if DRY_RUN else 'send', now=now): continue count += 1 _send_reports(count, errors)
def _send_activation_email(user: user_pb2.User, project: project_pb2.Project) -> None: """Send an email to the user just after we have defined their diagnosis.""" database, users_database, eval_database = mongo.get_connections_from_env() if not user.projects or user.projects[0] != project: user_with_project = user_pb2.User() user_with_project.CopyFrom(user) if not user_with_project.projects: user_with_project.projects.add() user_with_project.projects[0].CopyFrom(project) user = user_with_project all_campaigns.send_campaign('activation-email', user, action='send', database=database, users_database=users_database, eval_database=eval_database, now=now.get())
def get_more_jobs( user_proto: user_pb2.User, *, section_id: str, state: str) -> upskilling_pb2.Section: """Return more jobs for a given section.""" if not user_proto.projects: flask.abort(422, i18n.flask_translate("Il n'y a pas de projet à explorer.")) try: generator_id, section_state = state.split(':', 1) except ValueError: flask.abort( 422, i18n.flask_translate("Le paramètre d'état {state} n'a pas le bon format.") .format(state=state)) project = user_proto.projects[0] database = mongo.get_connections_from_env().stats_db scoring_project = scoring.ScoringProject(project, user_proto, database) try: generator = _SECTION_GENERATORS[generator_id] except KeyError: flask.abort( 404, i18n.flask_translate('Générateur de section inconnu: {generator_id}') .format(generator_id=generator_id)) try: section = generator.get_more_jobs( scoring_project=scoring_project, section_id=section_id, state=section_state) except _InvalidState: flask.abort( 422, i18n.flask_translate('Impossible de commencer à {start_from}') .format(start_from=section_state)) best_jobs_in_area = _get_best_jobs_in_area(scoring_project) are_all_jobs_hiring = _get_are_all_jobs_hiring() best_salaries = { job.job_group.rome_id for job in best_jobs_in_area.best_salaries_jobs} for job in section.jobs: _add_perks_to_job(job, best_salaries, is_hiring=are_all_jobs_hiring) return section
def get_challenge_actions() -> dict[str, dict[str, float]]: """Compute the score of each action for a given challenge. Returns a dict whose keys are challenges, and values are dicts of action -> normalized score. Normalization is so that total score for a given challenge is always 1. """ stats_db = mongo.get_connections_from_env().stats_db challenges: dict[str, dict[str, float]] = collections.defaultdict(dict) for action in _CHALLENGE_ACTIONS.get_collection(stats_db): for challenge, score in action.score_by_challenge.items(): challenges[challenge][action.action_id] = score for challenge, actions in challenges.items(): total_score = sum(actions.values()) challenges[challenge] = { action: score / total_score for action, score in actions.items() } return challenges
def _save_project(project: project_pb2.Project, previous_project: project_pb2.Project, user_data: user_pb2.User) -> project_pb2.Project: # TODO(cyrille): Check for completeness here, rather than in client. if project.is_incomplete: return project tick.tick('Process project start') rome_id = project.target_job.job_group.rome_id departement_id = project.city.departement_id if not project.project_id: # Add ID, timestamp and stats to new projects project.project_id = _create_new_project_id(user_data) common_proto.set_date_now(project.created_at) database = mongo.get_connections_from_env().stats_db tick.tick('Populate local stats') if previous_project.city.departement_id != departement_id or \ previous_project.target_job.job_group.rome_id != rome_id: project.ClearField('local_stats') if not project.HasField('local_stats'): project.local_stats.CopyFrom( jobs.get_local_stats(database, departement_id, rome_id)) tick.tick('Diagnostic') diagnostic.maybe_diagnose(user_data, project, database) tick.tick('Advisor') advisor.maybe_advise(user_data, project, database) tick.tick('Strategies') strategist.maybe_strategize(user_data, project, database) tick.tick('New feedback') if project.feedback.text and not previous_project.feedback.text: give_project_feedback(user_data.user_id, '@' in user_data.profile.email, project) tick.tick('Process project end') return project
def main(string_args: Optional[list[str]] = None) -> None: """Parse command line arguments and trigger the clean_guest_users function.""" parser = argparse.ArgumentParser( description='Clean guests and inactive users from the database.') parser.add_argument( '--max-users', help='Only consider a maximum of this number of users.', type=int) report.add_report_arguments(parser) args = parser.parse_args(string_args) if not report.setup_sentry_logging(args): return user_db = mongo.get_connections_from_env().user_db logging.info( 'Cleaned %d users, set check date for %d users and got %d errors', *clean_users(user_db, args.dry_run, args.max_users))
def main(pool_name: str = _YESTERDAY, users_json_filters: str = '', limit: str = '20') -> None: """Create a pool of use cases and store them in MongoDB.""" stats_db, user_db, eval_db = mongo.get_connections_from_env() user_counts = diagnostic.get_users_counts(stats_db) users_filters = json.loads( users_json_filters) if users_json_filters else _DEFAULT_USERS_FILTER user_iterator = user_db.user.find(users_filters).limit(int(limit)) num_cases = 0 for user_index, user_dict in enumerate(user_iterator): use_case_proto = privacy.user_to_use_case(user_dict, pool_name, user_index) if not use_case_proto: continue if user_counts and user_counts.frequent_firstnames: use_case_proto.user_data.profile.name = _pick_random( user_counts.frequent_firstnames) use_case = json_format.MessageToDict(use_case_proto) use_case['_id'] = use_case.pop('useCaseId') try: eval_db.use_case.insert_one(use_case) except errors.DuplicateKeyError: eval_db.use_case.replace_one({'_id': use_case['_id']}, use_case) num_cases += 1 if num_cases and _SLACK_CREATE_POOL_URL: pool_url = campaign.get_bob_link(f'/eval/{parse.quote(pool_name)}') requests.post( _SLACK_CREATE_POOL_URL, json={ 'text': f'A new use cases pool is ready for evaluation: <{pool_url}|{pool_name}>', })
"""A script to count users in each departement and rome group.""" import collections from bob_emploi.frontend.server import mongo from bob_emploi.frontend.server import now _, _DB = mongo.get_connections_from_env() def main(): """Aggregate users and populate user_count collection.""" aggregation = _DB.user.aggregate([{ '$match': { 'featuresEnabled.excludeFromAnalytics': { '$ne': True }, } }, { '$unwind': '$projects' }, { '$project': { '_id': 0, 'dep_id': '$projects.mobility.city.departementId', 'rome_id': '$projects.targetJob.jobGroup.romeId', } }]) job_group_counts = collections.defaultdict(int) dep_counts = collections.defaultdict(int)
def main(string_args: Optional[list[str]] = None) -> None: """Check the status of sent emails on MailJet and update our Database. """ parser = argparse.ArgumentParser( description='Update email status on sent emails.', formatter_class=argparse.ArgumentDefaultsHelpFormatter) report.add_report_arguments(parser) parser.add_argument( '--campaigns', choices=mail_blast.campaign.list_all_campaigns(), nargs='*', help='Campaign IDs to check. If not specified, run for all campaigns.') parser.add_argument('--mongo-collection', default='user', help='Name of the mongo collection to update.') args = parser.parse_args(string_args) if not report.setup_sentry_logging(args): return email_mongo_filter = { 'mailjetMessageId': { '$exists': True }, } if args.campaigns: email_mongo_filter['campaignId'] = {'$in': args.campaigns} yesterday = proto.datetime_to_json_string(now.get() - datetime.timedelta(days=1)) mongo_filter = { '$or': [ # Emails that we've never checked. { 'emailsSent': { '$elemMatch': dict({ 'lastStatusCheckedAt': { '$exists': False }, }, **email_mongo_filter), }, }, # Emails checked less than two weeks after they have been sent and # that we haven't checked today. { 'emailsSent': { '$elemMatch': dict( { 'lastStatusCheckedAt': { '$lt': yesterday }, 'lastStatusCheckedAfterDays': { '$not': { '$gte': 14 } }, }, **email_mongo_filter), }, }, # Emails sent less than 24 hours ago. { 'emailsSent': { '$elemMatch': dict({ 'sentAt': { '$gt': yesterday }, }, **email_mongo_filter), }, }, ], } user_db = mongo.get_connections_from_env().user_db mongo_collection = user_db.get_collection(args.mongo_collection) selected_users = mongo_collection.find(mongo_filter, {'emailsSent': 1}) treated_users = 0 # TODO(cyrille): Make sure errors are logged to sentry. # TODO(cyrille): If it fails on a specific user, keep going. for user in selected_users: emails_sent = user.get('emailsSent', []) updated_emails_sent = [ _update_email_sent_status(email, yesterday, campaign_ids=args.campaigns) for email in emails_sent ] mongo_collection.update_one( {'_id': user['_id']}, {'$set': { 'emailsSent': updated_emails_sent }}) treated_users += 1 if not treated_users % 100: logging.info('Treated %d users', treated_users)