def get_keen_activity(): client = KeenClient( project_id=settings.KEEN['public']['project_id'], read_key=settings.KEEN['public']['read_key'], ) node_pageviews = client.count(event_collection='pageviews', timeframe='this_7_days', group_by='node.id', filters=[{ 'property_name': 'node.id', 'operator': 'exists', 'property_value': True }]) node_visits = client.count_unique(event_collection='pageviews', target_property='anon.id', timeframe='this_7_days', group_by='node.id', filters=[{ 'property_name': 'node.id', 'operator': 'exists', 'property_value': True }]) return {'node_pageviews': node_pageviews, 'node_visits': node_visits}
def get_keen_activity(): client = KeenClient( project_id=settings.KEEN['public']['project_id'], read_key=settings.KEEN['public']['read_key'], ) node_pageviews = client.count( event_collection='pageviews', timeframe='this_7_days', group_by='node.id', filters=[ { 'property_name': 'node.id', 'operator': 'exists', 'property_value': True } ] ) node_visits = client.count_unique( event_collection='pageviews', target_property='anon.id', timeframe='this_7_days', group_by='node.id', filters=[ { 'property_name': 'node.id', 'operator': 'exists', 'property_value': True } ] ) return {'node_pageviews': node_pageviews, 'node_visits': node_visits}
def get_keen_activity(): client = KeenClient( project_id=settings.KEEN['public']['project_id'], read_key=settings.KEEN['public']['read_key'], ) node_pageviews = [] node_visits = [] for character in list(digits + ascii_lowercase): partial_node_pageviews = client.count( event_collection='pageviews-{}'.format(character), timeframe='this_7_days', group_by='node.id', filters=[{ 'property_name': 'node.id', 'operator': 'exists', 'property_value': True }]) node_pageviews += partial_node_pageviews partial_node_visits = client.count_unique( event_collection='pageviews-{}'.format(character), target_property='anon.id', timeframe='this_7_days', group_by='node.id', filters=[{ 'property_name': 'node.id', 'operator': 'exists', 'property_value': True }]) node_visits += partial_node_visits return {'node_pageviews': node_pageviews, 'node_visits': node_visits}
def calculate_stickiness(self, time_one, time_two): """Calculate the stickiness for date: (Unique users yesterday) / (Unique users over yesterday + 29 days) [total of 30 days]""" client = KeenClient( project_id=settings.KEEN['public']['project_id'], read_key=settings.KEEN['public']['read_key'], ) time_two_iso = time_two.isoformat() last_thirty = client.count_unique( event_collection='pageviews', # beginning of yesterday - 29 days = 30 total days timeframe={'start': (time_one - timedelta(days=29)).isoformat(), 'end': time_two_iso}, target_property='user.id', timezone='UTC' ) last_one = client.count_unique( event_collection='pageviews', timeframe={'start': time_one.isoformat(), 'end': time_two_iso}, target_property='user.id', timezone='UTC' ) # avoid unlikely divide by 0 error if last_thirty == 0: return 0 return last_one / last_thirty
def calculate_stickiness(self, time_one, time_two): """Calculate the stickiness for date: (Unique users yesterday) / (Unique users over yesterday + 29 days) [total of 30 days]""" client = KeenClient( project_id=settings.KEEN['public']['project_id'], read_key=settings.KEEN['public']['read_key'], ) time_two_iso = time_two.isoformat() last_thirty = client.count_unique( event_collection='pageviews', # beginning of yesterday - 29 days = 30 total days timeframe={ 'start': (time_one - timedelta(days=29)).isoformat(), 'end': time_two_iso }, target_property='user.id', timezone='UTC') last_one = client.count_unique(event_collection='pageviews', timeframe={ 'start': time_one.isoformat(), 'end': time_two_iso }, target_property='user.id', timezone='UTC') # avoid unlikely divide by 0 error if last_thirty == 0: return 0 return last_one / last_thirty
def __getattribute__(self, attr): """Wraps the above wrapped_methods in _wrap""" attribute = super(KeenCSVClient, self).__getattribute__(attr) if attr in KeenClient.__getattribute__(self, 'wrapped_methods'): return (self._wrap(attribute)) else: return (attribute)
def keen_add_events(events): KeenClient(project_id=settings.KEEN_PROJECT_ID, write_key=settings.KEEN_WRITE_KEY).add_events(events)
def main(dry_run=True, batch_count=None, force=False): """Upload the pageviews to Keen. """ history_run_id = utils.get_history_run_id_for('transform02') complaints_run_id = utils.get_complaints_run_id_for('transform02') if history_run_id != complaints_run_id: print( "You need to validate your first-phase transformed data! Bailing..." ) sys.exit() extract_complaints = utils.get_complaints_for('transform02', 'r') extract_complaints.readline() # toss header if extract_complaints.readline(): print( "You have unaddressed complaints in your second-phase transform!") if not force: print(" ...pass --force to ignore") sys.exit() history_file = utils.get_history_for('load', 'a') history_file.write(script_settings.RUN_HEADER + '{}\n'.format(complaints_run_id)) history_file.write('Beginning upload at: {}Z\n'.format(datetime.utcnow())) keen_clients = {'public': None, 'private': None} es_client = None if dry_run: print( "Doing dry-run upload to Elastic search. Pass --for-reals to upload to Keen" ) es_client = Elasticsearch() try: es_client.indices.delete(script_settings.ES_INDEX) except Exception as exc: print(exc) pass else: keen_clients = { 'public': KeenClient( project_id=settings.KEEN['public']['project_id'], write_key=settings.KEEN['public']['write_key'], ), 'private': KeenClient( project_id=settings.KEEN['private']['project_id'], write_key=settings.KEEN['private']['write_key'], ) } tally = {} seen = {} try: with open(utils.get_dir_for('load') + '/resume.log', 'r') as resume_log: for seen_file in resume_log.readlines(): seen[seen_file.strip('\n')] = 1 except: pass batch_count = utils.get_batch_count( ) if batch_count is None else batch_count print("Beginning Upload") with open(utils.get_dir_for('load') + '/resume.log', 'a', 0) as resume_log: for batch_id in range(1, batch_count + 1): print(" Batch {}".format(batch_id)) for domain in ('private', 'public'): print(" Domain: {}".format(domain)) file_id = '{}-{}'.format(domain, batch_id) if file_id in seen.keys(): print(" ...seen, skipping.\n") continue history_file.write('Uploading for {} project, batch {}'.format( domain, batch_id)) load_batch_for(batch_id, domain, tally, dry_run, es_client, keen_clients[domain]) resume_log.write('{}\n'.format(file_id)) history_file.write(' ...finished\n') print("Finished Upload") history_file.write('Finished upload at: {}Z\n'.format(datetime.utcnow())) history_file.write('Tally was:\n') for k, v in sorted(tally.items()): history_file.write(' {}: {}\n'.format(k, v))
def activity(): popular_public_projects = [] popular_public_registrations = [] hits = {} max_popular_projects = 20 if settings.KEEN['public']['read_key']: client = KeenClient( project_id=settings.KEEN['public']['project_id'], read_key=settings.KEEN['public']['read_key'], ) node_pageviews = client.count( event_collection='pageviews', timeframe='this_7_days', group_by='node.id', filters=[ { 'property_name': 'node.id', 'operator': 'exists', 'property_value': True } ] ) node_visits = client.count_unique( event_collection='pageviews', target_property='anon.id', timeframe='this_7_days', group_by='node.id', filters=[ { 'property_name': 'node.id', 'operator': 'exists', 'property_value': True } ] ) node_data = [{'node': x['node.id'], 'views': x['result']} for x in node_pageviews[0:max_popular_projects]] for node_visit in node_visits[0:max_popular_projects]: for node_result in node_data: if node_visit['node.id'] == node_result['node']: node_result.update({'visits': node_visit['result']}) node_data.sort(key=lambda datum: datum['views'], reverse=True) for nid in node_data: node = Node.load(nid['node']) if node is None: continue if node.is_public and not node.is_registration and not node.is_deleted: if len(popular_public_projects) < 10: popular_public_projects.append(node) elif node.is_public and node.is_registration and not node.is_deleted and not node.is_retracted: if len(popular_public_registrations) < 10: popular_public_registrations.append(node) if len(popular_public_projects) >= 10 and len(popular_public_registrations) >= 10: break hits = { datum['node']: { 'hits': datum['views'], 'visits': datum['visits'] } for datum in node_data } # Projects new_and_noteworthy_pointers = Node.find_one(Q('_id', 'eq', settings.NEW_AND_NOTEWORTHY_LINKS_NODE)).nodes_pointer new_and_noteworthy_projects = [pointer.node for pointer in new_and_noteworthy_pointers] return { 'new_and_noteworthy_projects': new_and_noteworthy_projects, 'recent_public_registrations': recent_public_registrations(), 'popular_public_projects': popular_public_projects, 'popular_public_registrations': popular_public_registrations, 'hits': hits, }
def activity(): popular_public_projects = [] popular_public_registrations = [] hits = {} max_popular_projects = 20 if settings.KEEN['public']['read_key']: client = KeenClient( project_id=settings.KEEN['public']['project_id'], read_key=settings.KEEN['public']['read_key'], ) node_pageviews = client.count( event_collection='pageviews', timeframe='this_7_days', group_by='node.id', filters=[ { 'property_name': 'node.id', 'operator': 'exists', 'property_value': True } ] ) node_visits = client.count_unique( event_collection='pageviews', target_property='anon.id', timeframe='this_7_days', group_by='node.id', filters=[ { 'property_name': 'node.id', 'operator': 'exists', 'property_value': True } ] ) node_data = [{'node': x['node.id'], 'views': x['result']} for x in node_pageviews[0:max_popular_projects]] for node_visit in node_visits[0:max_popular_projects]: for node_result in node_data: if node_visit['node.id'] == node_result['node']: node_result.update({'visits': node_visit['result']}) node_data.sort(key=lambda datum: datum['views'], reverse=True) for nid in node_data: node = Node.load(nid['node']) if node is None: continue if node.is_public and not node.is_registration and not node.is_deleted: if len(popular_public_projects) < 10: popular_public_projects.append(node) elif node.is_public and node.is_registration and not node.is_deleted and not node.is_retracted: if len(popular_public_registrations) < 10: popular_public_registrations.append(node) if len(popular_public_projects) >= 10 and len(popular_public_registrations) >= 10: break hits = { datum['node']: { 'hits': datum['views'], 'visits': datum['visits'] } for datum in node_data } # Projects # Only show top-level projects (any category) in new and noteworthy lists # This means that public children of private nodes will be excluded recent_query = ( Q('parent_node', 'eq', None) & Q('is_public', 'eq', True) & CONTENT_NODE_QUERY ) recent_public_projects = Node.find( recent_query & Q('is_registration', 'eq', False) ).sort( '-date_created' ).limit(10) return { 'recent_public_projects': recent_public_projects, 'recent_public_registrations': recent_public_registrations(), 'popular_public_projects': popular_public_projects, 'popular_public_registrations': popular_public_registrations, 'hits': hits, }