def track_periodic_data(): """ Sync data that is neither event or page based with hubspot/Kissmetrics :return: """ # Start by getting a list of web users mapped to their domains six_months_ago = date.today() - timedelta(days=180) users_to_domains = UserES().web_users().last_logged_in(gte=six_months_ago).fields(['domains', 'email'])\ .run().hits # users_to_domains is a list of dicts domains_to_forms = FormES().terms_aggregation('domain', 'domain').size(0).run()\ .aggregations.domain.counts_by_bucket() domains_to_mobile_users = UserES().mobile_users().terms_aggregation('domain', 'domain').size(0).run()\ .aggregations.domain.counts_by_bucket() # For each web user, iterate through their domains and select the max number of form submissions and # max number of mobile workers submit = [] for user in users_to_domains: email = user.get('email') if not email: continue max_forms = 0 max_workers = 0 for domain in user['domains']: if domain in domains_to_forms and domains_to_forms[domain] > max_forms: max_forms = domains_to_forms[domain] if domain in domains_to_mobile_users and domains_to_mobile_users[domain] > max_workers: max_workers = domains_to_mobile_users[domain] project_spaces_created = ", ".join(get_domains_created_by_user(email)) user_json = { 'email': email, 'properties': [ { 'property': 'max_form_submissions_in_a_domain', 'value': max_forms }, { 'property': 'max_mobile_workers_in_a_domain', 'value': max_workers }, { 'property': 'project_spaces_created_by_user', 'value': project_spaces_created, }, { 'property': 'over_300_form_submissions', 'value': max_forms > 300 } ] } submit.append(user_json) submit_json = json.dumps(submit) submit_data_to_hub_and_kiss(submit_json)
def track_periodic_data(): """ Sync data that is neither event or page based with hubspot/Kissmetrics :return: """ start_time = datetime.now() # Start by getting a list of web users mapped to their domains six_months_ago = date.today() - timedelta(days=180) users_to_domains = UserES().web_users().last_logged_in(gte=six_months_ago).fields(['domains', 'email'])\ .run().hits # users_to_domains is a list of dicts time_users_to_domains_query = datetime.now() domains_to_forms = FormES().terms_aggregation('domain', 'domain').size(0).run()\ .aggregations.domain.counts_by_bucket() time_domains_to_forms_query = datetime.now() domains_to_mobile_users = UserES().mobile_users().terms_aggregation('domain', 'domain').size(0).run()\ .aggregations.domain.counts_by_bucket() time_domains_to_mobile_users_query = datetime.now() # For each web user, iterate through their domains and select the max number of form submissions and # max number of mobile workers submit = [] for user in users_to_domains: email = user.get('email') if not email: continue max_forms = 0 max_workers = 0 for domain in user['domains']: if domain in domains_to_forms and domains_to_forms[domain] > max_forms: max_forms = domains_to_forms[domain] if domain in domains_to_mobile_users and domains_to_mobile_users[domain] > max_workers: max_workers = domains_to_mobile_users[domain] project_spaces_created = ", ".join(get_domains_created_by_user(email)) user_json = { 'email': email, 'properties': [ { 'property': 'max_form_submissions_in_a_domain', 'value': max_forms }, { 'property': 'max_mobile_workers_in_a_domain', 'value': max_workers }, { 'property': 'project_spaces_created_by_user', 'value': project_spaces_created, }, { 'property': 'over_300_form_submissions', 'value': max_forms > 300 } ] } submit.append(user_json) end_time = datetime.now() submit_json = json.dumps(submit) processing_time = end_time - start_time _soft_assert = soft_assert('{}@{}'.format('tsheffels', 'dimagi.com')) #TODO: Update this soft assert to only trigger if the timing is longer than a threshold msg = 'Periodic Data Timing: start: {}, users_to_domains: {}, domains_to_forms: {}, ' \ 'domains_to_mobile_workers: {}, end: {}, size of string post to hubspot (bytes): {}'\ .format( start_time, time_users_to_domains_query, time_domains_to_forms_query, time_domains_to_mobile_users_query, end_time, sys.getsizeof(submit_json) ) _soft_assert(processing_time.seconds < 100, msg) submit_data_to_hub_and_kiss(submit_json)
def track_periodic_data(): """ Sync data that is neither event or page based with hubspot/Kissmetrics :return: """ # Start by getting a list of web users mapped to their domains six_months_ago = date.today() - timedelta(days=180) users_to_domains = (UserES().web_users().last_logged_in( gte=six_months_ago).fields(['domains', 'email', 'date_joined' ]).analytics_enabled().run().hits) # users_to_domains is a list of dicts domains_to_forms = FormES().terms_aggregation('domain', 'domain').size(0).run()\ .aggregations.domain.counts_by_bucket() domains_to_mobile_users = UserES().mobile_users().terms_aggregation('domain', 'domain').size(0).run()\ .aggregations.domain.counts_by_bucket() # Keep track of india and www data seperately env = get_instance_string() # Track no of users and domains with max_forms greater than HUBSPOT_THRESHOLD number_of_users = 0 number_of_domains_with_forms_gt_threshold = 0 for num_forms in domains_to_forms.values(): if num_forms > HUBSPOT_THRESHOLD: number_of_domains_with_forms_gt_threshold += 1 # For each web user, iterate through their domains and select the max number of form submissions and # max number of mobile workers submit = [] for user in users_to_domains: email = user.get('email') if not _email_is_valid(email): continue number_of_users += 1 date_created = user.get('date_joined') max_forms = 0 max_workers = 0 for domain in user['domains']: if domain in domains_to_forms and domains_to_forms[ domain] > max_forms: max_forms = domains_to_forms[domain] if domain in domains_to_mobile_users and domains_to_mobile_users[ domain] > max_workers: max_workers = domains_to_mobile_users[domain] project_spaces_created = ", ".join(get_domains_created_by_user(email)) user_json = { 'email': email, 'properties': [{ 'property': '{}max_form_submissions_in_a_domain'.format(env), 'value': max_forms }, { 'property': '{}max_mobile_workers_in_a_domain'.format(env), 'value': max_workers }, { 'property': '{}project_spaces_created_by_user'.format(env), 'value': project_spaces_created, }, { 'property': '{}over_300_form_submissions'.format(env), 'value': max_forms > HUBSPOT_THRESHOLD }, { 'property': '{}date_created'.format(env), 'value': date_created }] } submit.append(user_json) submit_json = json.dumps(submit) submit_data_to_hub_and_kiss(submit_json) update_datadog_metrics({ DATADOG_WEB_USERS_GAUGE: number_of_users, DATADOG_DOMAINS_EXCEEDING_FORMS_GAUGE: number_of_domains_with_forms_gt_threshold })
def track_periodic_data(): """ Sync data that is neither event or page based with hubspot/Kissmetrics :return: """ # Start by getting a list of web users mapped to their domains six_months_ago = date.today() - timedelta(days=180) users_to_domains = UserES().web_users().last_logged_in(gte=six_months_ago)\ .fields(['domains', 'email', 'date_joined'])\ .run().hits # users_to_domains is a list of dicts domains_to_forms = FormES().terms_aggregation('domain', 'domain').size(0).run()\ .aggregations.domain.counts_by_bucket() domains_to_mobile_users = UserES().mobile_users().terms_aggregation('domain', 'domain').size(0).run()\ .aggregations.domain.counts_by_bucket() # Keep track of india and www data seperately env = get_instance_string() # For each web user, iterate through their domains and select the max number of form submissions and # max number of mobile workers submit = [] for user in users_to_domains: email = user.get('email') if not email: continue date_created = user.get('date_joined') max_forms = 0 max_workers = 0 for domain in user['domains']: if domain in domains_to_forms and domains_to_forms[ domain] > max_forms: max_forms = domains_to_forms[domain] if domain in domains_to_mobile_users and domains_to_mobile_users[ domain] > max_workers: max_workers = domains_to_mobile_users[domain] project_spaces_created = ", ".join(get_domains_created_by_user(email)) user_json = { 'email': email, 'properties': [{ 'property': '{}max_form_submissions_in_a_domain'.format(env), 'value': max_forms }, { 'property': '{}max_mobile_workers_in_a_domain'.format(env), 'value': max_workers }, { 'property': '{}project_spaces_created_by_user'.format(env), 'value': project_spaces_created, }, { 'property': '{}over_300_form_submissions'.format(env), 'value': max_forms > 300 }, { 'property': '{}date_created'.format(env), 'value': date_created }] } submit.append(user_json) submit_json = json.dumps(submit) submit_data_to_hub_and_kiss(submit_json)
def track_periodic_data(): """ Sync data that is neither event or page based with hubspot/Kissmetrics :return: """ # Start by getting a list of web users mapped to their domains if not KISSMETRICS_ENABLED and not HUBSPOT_ENABLED: return three_months_ago = date.today() - timedelta(days=90) user_query = (UserES() .web_users() .last_logged_in(gte=three_months_ago) .sort('date_joined', desc=True) .source(['domains', 'email', 'date_joined', 'username']) .analytics_enabled()) total_users = user_query.count() chunk_size = 100 num_chunks = int(math.ceil(float(total_users) / float(chunk_size))) # Track no of users and domains with max_forms greater than HUBSPOT_THRESHOLD hubspot_number_of_users = 0 hubspot_number_of_domains_with_forms_gt_threshold = 0 blocked_domains = get_blocked_hubspot_domains() blocked_email_domains = get_blocked_hubspot_email_domains() for chunk in range(num_chunks): users_to_domains = (user_query .size(chunk_size) .start(chunk * chunk_size) .run() .hits) # users_to_domains is a list of dicts domains_to_forms = (FormES() .terms_aggregation('domain.exact', 'domain') .size(0) .run() .aggregations.domain.counts_by_bucket()) domains_to_mobile_users = (UserES() .mobile_users() .terms_aggregation('domain.exact', 'domain') .size(0) .run() .aggregations .domain .counts_by_bucket()) # Keep track of india and www data seperately env = get_instance_string() for num_forms in domains_to_forms.values(): if num_forms > HUBSPOT_THRESHOLD: hubspot_number_of_domains_with_forms_gt_threshold += 1 # For each web user, iterate through their domains and select the max number of form submissions and # max number of mobile workers submit = [] for user in users_to_domains: email = user.get('email') or user.get('username') if not _email_is_valid(email): continue email_domain = email.split('@')[-1] if email_domain in blocked_email_domains: metrics_gauge( 'commcare.hubspot_data.rejected.periodic_task.email_domain', 1, tags={ 'email_domain': email_domain, } ) continue username_email_domain = user.get('username').split('@')[-1] if username_email_domain in blocked_email_domains: metrics_gauge( 'commcare.hubspot_data.rejected.periodic_task.username', 1, tags={ 'username': username_email_domain, } ) continue hubspot_number_of_users += 1 date_created = user.get('date_joined') max_forms = 0 max_workers = 0 max_export = 0 max_report = 0 is_member_of_blocked_domain = False for domain in user['domains']: if domain in blocked_domains: metrics_gauge( 'commcare.hubspot_data.rejected.periodic_task.domain', 1, tags={ 'domain': domain, } ) is_member_of_blocked_domain = True break if domain in domains_to_forms and domains_to_forms[domain] > max_forms: max_forms = domains_to_forms[domain] if domain in domains_to_mobile_users and domains_to_mobile_users[domain] > max_workers: max_workers = domains_to_mobile_users[domain] if _get_export_count(domain) > max_export: max_export = _get_export_count(domain) if _get_report_count(domain) > max_report: max_report = _get_report_count(domain) if is_member_of_blocked_domain: continue project_spaces_created = ", ".join(get_domains_created_by_user(email)) user_json = { 'email': email, 'properties': [ { 'property': '{}max_form_submissions_in_a_domain'.format(env), 'value': max_forms }, { 'property': '{}max_mobile_workers_in_a_domain'.format(env), 'value': max_workers }, { 'property': '{}project_spaces_created_by_user'.format(env), 'value': project_spaces_created, }, { 'property': '{}over_300_form_submissions'.format(env), 'value': max_forms > HUBSPOT_THRESHOLD }, { 'property': '{}date_created'.format(env), 'value': date_created }, { 'property': '{}max_exports_in_a_domain'.format(env), 'value': max_export }, { 'property': '{}max_custom_reports_in_a_domain'.format(env), 'value': max_report } ] } submit.append(user_json) submit_json = json.dumps(submit) submit_data_to_hub_and_kiss(submit_json) metrics_gauge('commcare.hubspot.web_users_processed', hubspot_number_of_users, multiprocess_mode=MPM_LIVESUM) metrics_gauge( 'commcare.hubspot.domains_with_forms_gt_threshold', hubspot_number_of_domains_with_forms_gt_threshold, multiprocess_mode=MPM_MAX )
def track_periodic_data(): """ Sync data that is neither event or page based with hubspot/Kissmetrics :return: """ # Start by getting a list of web users mapped to their domains three_months_ago = date.today() - timedelta(days=90) user_query = (UserES() .web_users() .last_logged_in(gte=three_months_ago) .sort('date_joined', desc=True) .source(['domains', 'email', 'date_joined']) .analytics_enabled()) total_users = user_query.count() chunk_size = 100 num_chunks = int(math.ceil(float(total_users) / float(chunk_size))) # Track no of users and domains with max_forms greater than HUBSPOT_THRESHOLD hubspot_number_of_users = 0 hubspot_number_of_domains_with_forms_gt_threshold = 0 for chunk in range(num_chunks): users_to_domains = (user_query .size(chunk_size) .start(chunk * chunk_size) .run() .hits) # users_to_domains is a list of dicts domains_to_forms = (FormES() .terms_aggregation('domain', 'domain') .size(0) .run() .aggregations.domain.counts_by_bucket()) domains_to_mobile_users = (UserES() .mobile_users() .terms_aggregation('domain', 'domain') .size(0) .run() .aggregations .domain .counts_by_bucket()) # Keep track of india and www data seperately env = get_instance_string() for num_forms in domains_to_forms.values(): if num_forms > HUBSPOT_THRESHOLD: hubspot_number_of_domains_with_forms_gt_threshold += 1 # For each web user, iterate through their domains and select the max number of form submissions and # max number of mobile workers submit = [] for user in users_to_domains: email = user.get('email') if not _email_is_valid(email): continue hubspot_number_of_users += 1 date_created = user.get('date_joined') max_forms = 0 max_workers = 0 max_export = 0 max_report = 0 for domain in user['domains']: if domain in domains_to_forms and domains_to_forms[domain] > max_forms: max_forms = domains_to_forms[domain] if domain in domains_to_mobile_users and domains_to_mobile_users[domain] > max_workers: max_workers = domains_to_mobile_users[domain] if _get_export_count(domain) > max_export: max_export = _get_export_count(domain) if _get_report_count(domain) > max_report: max_report = _get_report_count(domain) project_spaces_created = ", ".join(get_domains_created_by_user(email)) user_json = { 'email': email, 'properties': [ { 'property': '{}max_form_submissions_in_a_domain'.format(env), 'value': max_forms }, { 'property': '{}max_mobile_workers_in_a_domain'.format(env), 'value': max_workers }, { 'property': '{}project_spaces_created_by_user'.format(env), 'value': project_spaces_created, }, { 'property': '{}over_300_form_submissions'.format(env), 'value': max_forms > HUBSPOT_THRESHOLD }, { 'property': '{}date_created'.format(env), 'value': date_created }, { 'property': '{}max_exports_in_a_domain'.format(env), 'value': max_export }, { 'property': '{}max_custom_reports_in_a_domain'.format(env), 'value': max_report } ] } submit.append(user_json) submit_json = json.dumps(submit) submit_data_to_hub_and_kiss(submit_json) update_datadog_metrics({ DATADOG_WEB_USERS_GAUGE: hubspot_number_of_users, DATADOG_DOMAINS_EXCEEDING_FORMS_GAUGE: hubspot_number_of_domains_with_forms_gt_threshold })
def track_periodic_data(): """ Sync data that is neither event or page based with hubspot/Kissmetrics :return: """ # Start by getting a list of web users mapped to their domains six_months_ago = date.today() - timedelta(days=180) users_to_domains = ( UserES().web_users().last_logged_in(gte=six_months_ago).fields(["domains", "email", "date_joined"]).run().hits ) # users_to_domains is a list of dicts domains_to_forms = ( FormES().terms_aggregation("domain", "domain").size(0).run().aggregations.domain.counts_by_bucket() ) domains_to_mobile_users = ( UserES() .mobile_users() .terms_aggregation("domain", "domain") .size(0) .run() .aggregations.domain.counts_by_bucket() ) # Keep track of india and www data seperately env = get_instance_string() # Track no of users and domains with max_forms greater than HUBSPOT_THRESHOLD number_of_users = 0 number_of_domains_with_forms_gt_threshold = 0 for num_forms in domains_to_forms.values(): if num_forms > HUBSPOT_THRESHOLD: number_of_domains_with_forms_gt_threshold += 1 # For each web user, iterate through their domains and select the max number of form submissions and # max number of mobile workers submit = [] for user in users_to_domains: email = user.get("email") if not _email_is_valid(email): continue number_of_users += 1 date_created = user.get("date_joined") max_forms = 0 max_workers = 0 for domain in user["domains"]: if domain in domains_to_forms and domains_to_forms[domain] > max_forms: max_forms = domains_to_forms[domain] if domain in domains_to_mobile_users and domains_to_mobile_users[domain] > max_workers: max_workers = domains_to_mobile_users[domain] project_spaces_created = ", ".join(get_domains_created_by_user(email)) user_json = { "email": email, "properties": [ {"property": "{}max_form_submissions_in_a_domain".format(env), "value": max_forms}, {"property": "{}max_mobile_workers_in_a_domain".format(env), "value": max_workers}, {"property": "{}project_spaces_created_by_user".format(env), "value": project_spaces_created}, {"property": "{}over_300_form_submissions".format(env), "value": max_forms > HUBSPOT_THRESHOLD}, {"property": "{}date_created".format(env), "value": date_created}, ], } submit.append(user_json) submit_json = json.dumps(submit) submit_data_to_hub_and_kiss(submit_json) update_datadog_metrics( { DATADOG_WEB_USERS_GAUGE: number_of_users, DATADOG_DOMAINS_EXCEEDING_FORMS_GAUGE: number_of_domains_with_forms_gt_threshold, } )
def track_periodic_data(): """ Sync data that is neither event or page based with hubspot/Kissmetrics :return: """ # Start by getting a list of web users mapped to their domains if not KISSMETRICS_ENABLED and not HUBSPOT_ENABLED: return three_months_ago = date.today() - timedelta(days=90) user_query = (UserES().web_users().last_logged_in( gte=three_months_ago).sort('date_joined', desc=True).source( ['domains', 'email', 'date_joined']).analytics_enabled()) total_users = user_query.count() chunk_size = 100 num_chunks = int(math.ceil(float(total_users) / float(chunk_size))) # Track no of users and domains with max_forms greater than HUBSPOT_THRESHOLD hubspot_number_of_users = 0 hubspot_number_of_domains_with_forms_gt_threshold = 0 for chunk in range(num_chunks): users_to_domains = (user_query.size(chunk_size).start( chunk * chunk_size).run().hits) # users_to_domains is a list of dicts domains_to_forms = (FormES().terms_aggregation( 'domain', 'domain').size(0).run().aggregations.domain.counts_by_bucket()) domains_to_mobile_users = (UserES().mobile_users().terms_aggregation( 'domain', 'domain').size(0).run().aggregations.domain.counts_by_bucket()) # Keep track of india and www data seperately env = get_instance_string() for num_forms in domains_to_forms.values(): if num_forms > HUBSPOT_THRESHOLD: hubspot_number_of_domains_with_forms_gt_threshold += 1 # For each web user, iterate through their domains and select the max number of form submissions and # max number of mobile workers submit = [] for user in users_to_domains: email = user.get('email') if not _email_is_valid(email): continue hubspot_number_of_users += 1 date_created = user.get('date_joined') max_forms = 0 max_workers = 0 max_export = 0 max_report = 0 for domain in user['domains']: if domain in domains_to_forms and domains_to_forms[ domain] > max_forms: max_forms = domains_to_forms[domain] if domain in domains_to_mobile_users and domains_to_mobile_users[ domain] > max_workers: max_workers = domains_to_mobile_users[domain] if _get_export_count(domain) > max_export: max_export = _get_export_count(domain) if _get_report_count(domain) > max_report: max_report = _get_report_count(domain) project_spaces_created = ", ".join( get_domains_created_by_user(email)) user_json = { 'email': email, 'properties': [{ 'property': '{}max_form_submissions_in_a_domain'.format(env), 'value': max_forms }, { 'property': '{}max_mobile_workers_in_a_domain'.format(env), 'value': max_workers }, { 'property': '{}project_spaces_created_by_user'.format(env), 'value': project_spaces_created, }, { 'property': '{}over_300_form_submissions'.format(env), 'value': max_forms > HUBSPOT_THRESHOLD }, { 'property': '{}date_created'.format(env), 'value': date_created }, { 'property': '{}max_exports_in_a_domain'.format(env), 'value': max_export }, { 'property': '{}max_custom_reports_in_a_domain'.format(env), 'value': max_report }] } submit.append(user_json) submit_json = json.dumps(submit) submit_data_to_hub_and_kiss(submit_json) update_datadog_metrics({ DATADOG_WEB_USERS_GAUGE: hubspot_number_of_users, DATADOG_DOMAINS_EXCEEDING_FORMS_GAUGE: hubspot_number_of_domains_with_forms_gt_threshold })