示例#1
0
def get_keen_activity():
    client = KeenClient(
        project_id=settings.KEEN['public']['project_id'],
        read_key=settings.KEEN['public']['read_key'],
    )

    node_pageviews = client.count(event_collection='pageviews',
                                  timeframe='this_7_days',
                                  group_by='node.id',
                                  filters=[{
                                      'property_name': 'node.id',
                                      'operator': 'exists',
                                      'property_value': True
                                  }])

    node_visits = client.count_unique(event_collection='pageviews',
                                      target_property='anon.id',
                                      timeframe='this_7_days',
                                      group_by='node.id',
                                      filters=[{
                                          'property_name': 'node.id',
                                          'operator': 'exists',
                                          'property_value': True
                                      }])

    return {'node_pageviews': node_pageviews, 'node_visits': node_visits}
示例#2
0
文件: utils.py 项目: mfraezz/osf.io
def get_keen_activity():
    client = KeenClient(
        project_id=settings.KEEN['public']['project_id'],
        read_key=settings.KEEN['public']['read_key'],
    )

    node_pageviews = client.count(
        event_collection='pageviews',
        timeframe='this_7_days',
        group_by='node.id',
        filters=[
            {
                'property_name': 'node.id',
                'operator': 'exists',
                'property_value': True
            }
        ]
    )

    node_visits = client.count_unique(
        event_collection='pageviews',
        target_property='anon.id',
        timeframe='this_7_days',
        group_by='node.id',
        filters=[
            {
                'property_name': 'node.id',
                'operator': 'exists',
                'property_value': True
            }
        ]
    )

    return {'node_pageviews': node_pageviews, 'node_visits': node_visits}
示例#3
0
def get_keen_activity():
    client = KeenClient(
        project_id=settings.KEEN['public']['project_id'],
        read_key=settings.KEEN['public']['read_key'],
    )

    node_pageviews = []
    node_visits = []
    for character in list(digits + ascii_lowercase):
        partial_node_pageviews = client.count(
            event_collection='pageviews-{}'.format(character),
            timeframe='this_7_days',
            group_by='node.id',
            filters=[{
                'property_name': 'node.id',
                'operator': 'exists',
                'property_value': True
            }])
        node_pageviews += partial_node_pageviews

        partial_node_visits = client.count_unique(
            event_collection='pageviews-{}'.format(character),
            target_property='anon.id',
            timeframe='this_7_days',
            group_by='node.id',
            filters=[{
                'property_name': 'node.id',
                'operator': 'exists',
                'property_value': True
            }])
        node_visits += partial_node_visits

    return {'node_pageviews': node_pageviews, 'node_visits': node_visits}
示例#4
0
    def calculate_stickiness(self, time_one, time_two):
        """Calculate the stickiness for date: (Unique users yesterday) / (Unique users over yesterday + 29 days) [total of 30 days]"""
        client = KeenClient(
            project_id=settings.KEEN['public']['project_id'],
            read_key=settings.KEEN['public']['read_key'],
        )

        time_two_iso = time_two.isoformat()
        last_thirty = client.count_unique(
            event_collection='pageviews',
            # beginning of yesterday - 29 days = 30 total days
            timeframe={'start': (time_one - timedelta(days=29)).isoformat(), 'end': time_two_iso},
            target_property='user.id',
            timezone='UTC'
        )

        last_one = client.count_unique(
            event_collection='pageviews',
            timeframe={'start': time_one.isoformat(), 'end': time_two_iso},
            target_property='user.id',
            timezone='UTC'
        )

        # avoid unlikely divide by 0 error
        if last_thirty == 0:
            return 0
        return last_one / last_thirty
示例#5
0
    def calculate_stickiness(self, time_one, time_two):
        """Calculate the stickiness for date: (Unique users yesterday) / (Unique users over yesterday + 29 days) [total of 30 days]"""
        client = KeenClient(
            project_id=settings.KEEN['public']['project_id'],
            read_key=settings.KEEN['public']['read_key'],
        )

        time_two_iso = time_two.isoformat()
        last_thirty = client.count_unique(
            event_collection='pageviews',
            # beginning of yesterday - 29 days = 30 total days
            timeframe={
                'start': (time_one - timedelta(days=29)).isoformat(),
                'end': time_two_iso
            },
            target_property='user.id',
            timezone='UTC')

        last_one = client.count_unique(event_collection='pageviews',
                                       timeframe={
                                           'start': time_one.isoformat(),
                                           'end': time_two_iso
                                       },
                                       target_property='user.id',
                                       timezone='UTC')

        # avoid unlikely divide by 0 error
        if last_thirty == 0:
            return 0
        return last_one / last_thirty
示例#6
0
 def __getattribute__(self, attr):
     """Wraps the above wrapped_methods in _wrap"""
     attribute = super(KeenCSVClient, self).__getattribute__(attr)
     if attr in KeenClient.__getattribute__(self, 'wrapped_methods'):
         return (self._wrap(attribute))
     else:
         return (attribute)
示例#7
0
def keen_add_events(events):
    KeenClient(project_id=settings.KEEN_PROJECT_ID, write_key=settings.KEEN_WRITE_KEY).add_events(events)
示例#8
0
def main(dry_run=True, batch_count=None, force=False):
    """Upload the pageviews to Keen.
    """

    history_run_id = utils.get_history_run_id_for('transform02')
    complaints_run_id = utils.get_complaints_run_id_for('transform02')
    if history_run_id != complaints_run_id:
        print(
            "You need to validate your first-phase transformed data! Bailing..."
        )
        sys.exit()

    extract_complaints = utils.get_complaints_for('transform02', 'r')
    extract_complaints.readline()  # toss header
    if extract_complaints.readline():
        print(
            "You have unaddressed complaints in your second-phase transform!")
        if not force:
            print("  ...pass --force to ignore")
            sys.exit()

    history_file = utils.get_history_for('load', 'a')
    history_file.write(script_settings.RUN_HEADER +
                       '{}\n'.format(complaints_run_id))
    history_file.write('Beginning upload at: {}Z\n'.format(datetime.utcnow()))

    keen_clients = {'public': None, 'private': None}
    es_client = None
    if dry_run:
        print(
            "Doing dry-run upload to Elastic search.  Pass --for-reals to upload to Keen"
        )
        es_client = Elasticsearch()
        try:
            es_client.indices.delete(script_settings.ES_INDEX)
        except Exception as exc:
            print(exc)
            pass
    else:
        keen_clients = {
            'public':
            KeenClient(
                project_id=settings.KEEN['public']['project_id'],
                write_key=settings.KEEN['public']['write_key'],
            ),
            'private':
            KeenClient(
                project_id=settings.KEEN['private']['project_id'],
                write_key=settings.KEEN['private']['write_key'],
            )
        }

    tally = {}
    seen = {}

    try:
        with open(utils.get_dir_for('load') + '/resume.log',
                  'r') as resume_log:
            for seen_file in resume_log.readlines():
                seen[seen_file.strip('\n')] = 1
    except:
        pass

    batch_count = utils.get_batch_count(
    ) if batch_count is None else batch_count
    print("Beginning Upload")
    with open(utils.get_dir_for('load') + '/resume.log', 'a', 0) as resume_log:
        for batch_id in range(1, batch_count + 1):
            print("  Batch {}".format(batch_id))
            for domain in ('private', 'public'):
                print("    Domain: {}".format(domain))
                file_id = '{}-{}'.format(domain, batch_id)
                if file_id in seen.keys():
                    print("  ...seen, skipping.\n")
                    continue

                history_file.write('Uploading for {} project, batch {}'.format(
                    domain, batch_id))
                load_batch_for(batch_id, domain, tally, dry_run, es_client,
                               keen_clients[domain])
                resume_log.write('{}\n'.format(file_id))
                history_file.write('  ...finished\n')

    print("Finished Upload")
    history_file.write('Finished upload at: {}Z\n'.format(datetime.utcnow()))
    history_file.write('Tally was:\n')
    for k, v in sorted(tally.items()):
        history_file.write('  {}: {}\n'.format(k, v))
示例#9
0
文件: views.py 项目: scooley/osf.io
def activity():

    popular_public_projects = []
    popular_public_registrations = []
    hits = {}
    max_popular_projects = 20

    if settings.KEEN['public']['read_key']:
        client = KeenClient(
            project_id=settings.KEEN['public']['project_id'],
            read_key=settings.KEEN['public']['read_key'],
        )

        node_pageviews = client.count(
            event_collection='pageviews',
            timeframe='this_7_days',
            group_by='node.id',
            filters=[
                {
                    'property_name': 'node.id',
                    'operator': 'exists',
                    'property_value': True
                }
            ]
        )

        node_visits = client.count_unique(
            event_collection='pageviews',
            target_property='anon.id',
            timeframe='this_7_days',
            group_by='node.id',
            filters=[
                {
                    'property_name': 'node.id',
                    'operator': 'exists',
                    'property_value': True
                }
            ]
        )

        node_data = [{'node': x['node.id'], 'views': x['result']} for x in node_pageviews[0:max_popular_projects]]

        for node_visit in node_visits[0:max_popular_projects]:
            for node_result in node_data:
                if node_visit['node.id'] == node_result['node']:
                    node_result.update({'visits': node_visit['result']})

        node_data.sort(key=lambda datum: datum['views'], reverse=True)

        for nid in node_data:
            node = Node.load(nid['node'])
            if node is None:
                continue
            if node.is_public and not node.is_registration and not node.is_deleted:
                if len(popular_public_projects) < 10:
                    popular_public_projects.append(node)
            elif node.is_public and node.is_registration and not node.is_deleted and not node.is_retracted:
                if len(popular_public_registrations) < 10:
                    popular_public_registrations.append(node)
            if len(popular_public_projects) >= 10 and len(popular_public_registrations) >= 10:
                break

        hits = {
            datum['node']: {
                'hits': datum['views'],
                'visits': datum['visits']
            } for datum in node_data
        }

    # Projects

    new_and_noteworthy_pointers = Node.find_one(Q('_id', 'eq', settings.NEW_AND_NOTEWORTHY_LINKS_NODE)).nodes_pointer
    new_and_noteworthy_projects = [pointer.node for pointer in new_and_noteworthy_pointers]

    return {
        'new_and_noteworthy_projects': new_and_noteworthy_projects,
        'recent_public_registrations': recent_public_registrations(),
        'popular_public_projects': popular_public_projects,
        'popular_public_registrations': popular_public_registrations,
        'hits': hits,
    }
示例#10
0
文件: views.py 项目: atelic/osf.io
def activity():

    popular_public_projects = []
    popular_public_registrations = []
    hits = {}
    max_popular_projects = 20

    if settings.KEEN['public']['read_key']:
        client = KeenClient(
            project_id=settings.KEEN['public']['project_id'],
            read_key=settings.KEEN['public']['read_key'],
        )

        node_pageviews = client.count(
            event_collection='pageviews',
            timeframe='this_7_days',
            group_by='node.id',
            filters=[
                {
                    'property_name': 'node.id',
                    'operator': 'exists',
                    'property_value': True
                }
            ]
        )

        node_visits = client.count_unique(
            event_collection='pageviews',
            target_property='anon.id',
            timeframe='this_7_days',
            group_by='node.id',
            filters=[
                {
                    'property_name': 'node.id',
                    'operator': 'exists',
                    'property_value': True
                }
            ]
        )

        node_data = [{'node': x['node.id'], 'views': x['result']} for x in node_pageviews[0:max_popular_projects]]

        for node_visit in node_visits[0:max_popular_projects]:
            for node_result in node_data:
                if node_visit['node.id'] == node_result['node']:
                    node_result.update({'visits': node_visit['result']})

        node_data.sort(key=lambda datum: datum['views'], reverse=True)

        for nid in node_data:
            node = Node.load(nid['node'])
            if node is None:
                continue
            if node.is_public and not node.is_registration and not node.is_deleted:
                if len(popular_public_projects) < 10:
                    popular_public_projects.append(node)
            elif node.is_public and node.is_registration and not node.is_deleted and not node.is_retracted:
                if len(popular_public_registrations) < 10:
                    popular_public_registrations.append(node)
            if len(popular_public_projects) >= 10 and len(popular_public_registrations) >= 10:
                break

        hits = {
            datum['node']: {
                'hits': datum['views'],
                'visits': datum['visits']
            } for datum in node_data
        }

    # Projects

    # Only show top-level projects (any category) in new and noteworthy lists
    # This means that public children of private nodes will be excluded
    recent_query = (
        Q('parent_node', 'eq', None) &
        Q('is_public', 'eq', True) &
        CONTENT_NODE_QUERY
    )

    recent_public_projects = Node.find(
        recent_query &
        Q('is_registration', 'eq', False)
    ).sort(
        '-date_created'
    ).limit(10)

    return {
        'recent_public_projects': recent_public_projects,
        'recent_public_registrations': recent_public_registrations(),
        'popular_public_projects': popular_public_projects,
        'popular_public_registrations': popular_public_registrations,
        'hits': hits,
    }