Пример #1
0
def get_group_name(urlString, req_type):
    try:
        url2 = urlString[urlString.find('profile/'):]
        url3 = url2[url2.find('/') + 1:]
        url4 = url3[url3.find('/') + 1:]
        return url4
    except:
        gc.connect_to_database()
        gc.create_session()
        url = req_type['filter']
        url2 = url[url.find('profile/'):]
        url3 = url2[url2.find('/') + 1:]
        group_guid = url3[:url3.find('/')]
        group_name = gc.groups.name_from_guid(group_guid)
        return group_name
Пример #2
0
def connect_to_collab():
    """
    Helper function to streamline connection to the GCcollab database
    """
    engine, conn = gccollab.connect_to_database()
    session, Base = gccollab.create_session()

    return engine, conn, session, Base
Пример #3
0
def main(testing=False):
    # Get JSON object from stdin
    if testing == False:
        req_obj = read_in()
    else:
        req_obj = json.loads(testing)

    # Pick apart request object
    req_type = req_obj['reqType']
    metric = req_obj['metric']
    metric2 = req_obj['metric2']
    start_time = req_obj['time']['startDate']
    end_time = req_obj['time']['endDate']
    all_time = req_obj['time']['allTime']

    #https://gcconnex.gc.ca/newsfeed/
    # https://gcconnex.gc.ca/groups/profile/24345174/endata-visualizationfr
    # https://gcconnex.gc.ca/groups/profile/272967/enblueprint-2020-objectif-2020fr
    # Start parsing the request. This is where it gets interesting / messy!
    # Type
    if req_type['category'] == 1:  # Groups
        url = req_type['filter']
        if metric == 1:  # Pageviews
            ga = gcga()
            ga.set_platform('gccollab')

            group_name = get_group_name(url, req_type)

            # Request a dataframe containing pageviews and corresponding dates
            ret = ga.pageviews([url, 'NOToffset'],
                               intervals=True,
                               start_date=start_time,
                               end_date=end_time)
            ret['group_name'] = group_name
            print(json.dumps(ret))

        elif metric == 2:  # Top content
            # Establish database connection
            gc.connect_to_database()
            gc.create_session()

            # Figure out group guid from url
            url2 = url[url.find('profile/'):]
            url3 = url2[url2.find('/') + 1:]
            group_guid = url3[:url3.find('/')]

            # Get the guids of all content within group
            guid_list = gc.content.get_top_content(group_guid)

            # Get the clean group name from the guid
            group_name = gc.groups.name_from_guid(group_guid)

            # Build regex string from guids
            regex_str = ''
            l = guid_list.values.astype(str).tolist()
            for i in range(len(l)):
                regex_str = regex_str + ('/' + l[i][0] + '/|'
                                         )  # ADD FIX TO GCCONNEX
            # Remove trailing pipe
            regex_str = regex_str[:-1]

            # Send google analytics request with regex string
            ga = gcga()
            ga.set_platform('gccollab')
            ret = ga.content_views(regex_str)

            # Format results and print to stdout
            ret['group_name'] = group_name
            print(json.dumps(ret))

        elif metric == 3:  # Number of members
            # Establish database connection
            gc.connect_to_database()
            gc.create_session()

            # Determine group guid
            group_guid = get_group_guid(url)

            # Query the database
            group_members = gc.groups.get_group_members(group_guid,
                                                        cleaned=False)
            group_name = gc.groups.name_from_guid(group_guid)

            # Get mungin'
            # Convert timjes to datetime objects
            group_members['time_created'] = group_members[
                'time_created'].apply(lambda x: pd.to_datetime(x))

            group_members.set_index('time_created', inplace=True)

            group_members = group_members[
                group_members.index > pd.to_datetime('2000-01-01')]

            # Daily
            group_members_daily = group_members['user_name'].groupby(
                pd.TimeGrouper(freq='D')).count().cumsum()
            group_members_daily = group_members_daily.reset_index()
            # Only keep current time selection
            group_members_daily = group_members_daily[
                group_members_daily['time_created'] >= pd.to_datetime(
                    start_time)]
            group_members_daily = group_members_daily[
                group_members_daily['time_created'] <= pd.to_datetime(
                    end_time)]

            # If the requested start date predates the oldest time on the dataframe, pad with 0s
            if min(group_members_daily['time_created']) > pd.to_datetime(
                    start_time):
                ix = pd.DatetimeIndex(start=pd.to_datetime(start_time),
                                      end=max(
                                          group_members_daily['time_created']),
                                      freq='D')
                group_members_daily = group_members_daily.set_index(
                    'time_created').reindex(ix, fill_value=0).reset_index()
                group_members_daily.rename(columns={'index': 'time_created'},
                                           inplace=True)

            group_members_daily['time_created'] = group_members_daily[
                'time_created'].apply(lambda x: x.strftime('%Y%m%d'))

            # Monthly
            group_members_monthly = group_members['user_name'].groupby(
                pd.TimeGrouper(freq='M')).count().cumsum()
            group_members_monthly = group_members_monthly.reset_index()
            # Only keep current time selection
            group_members_monthly = group_members_monthly[
                group_members_monthly['time_created'] >= pd.to_datetime(
                    start_time)]
            group_members_monthly = group_members_monthly[
                group_members_monthly['time_created'] <= pd.to_datetime(
                    end_time)]

            # (monthly) If the requested start date predates the oldest time on the dataframe, pad with 0s
            if min(group_members_monthly['time_created']) > pd.to_datetime(
                    start_time):
                ix = pd.DatetimeIndex(
                    start=pd.to_datetime(start_time),
                    end=max(group_members_monthly['time_created']),
                    freq='M')
                group_members_monthly = group_members_monthly.set_index(
                    'time_created').reindex(ix, fill_value=0).reset_index()
                group_members_monthly.rename(columns={'index': 'time_created'},
                                             inplace=True)

            group_members_monthly['time_created'] = group_members_monthly[
                'time_created'].apply(lambda x: x.strftime('%Y%m%d'))

            send_obj = {
                'monthly': {
                    'dates':
                    group_members_monthly['time_created'].values.tolist(),
                    'users':
                    group_members_monthly['user_name'].values.tolist()
                },
                'daily': {
                    'dates':
                    group_members_daily['time_created'].values.tolist(),
                    'users': group_members_daily['user_name'].values.tolist(),
                },
                'group_name': group_name
            }

            print(json.dumps(send_obj))

        elif metric == 4:  # Group members by department
            gc.connect_to_database()
            gc.create_session()

            # Determine group guid
            group_guid = get_group_guid(url)

            members = gc.groups.get_group_members(group_guid)

            # The index used below could be any column, not important
            members = members.groupby('department').count().reset_index(
            ).set_index('time_created').sort_index(
                ascending=False).reset_index()

            send_obj = {
                'departments': members['department'].values.tolist(),
                'members': members['time_created'].values.tolist()
            }
            print(json.dumps(send_obj))

    elif req_type['category'] == 2:  # Users
        if req_type['filter'] == 1:  # All users
            # This is for both 'Registered users' and 'Active users' queries.
            # (The queries only differ by one line)

            gc.connect_to_database()
            gc.create_session()

            users = gc.users.get_all()
            users['time_created'] = users['time_created'].apply(
                lambda x: pd.to_datetime(x))

            # Get monthly totals
            monthly = users.set_index('time_created').groupby(
                pd.TimeGrouper(freq='M')).count().cumsum().reset_index()[[
                    'time_created', 'email'
                ]]
            monthly['time_created'] = monthly['time_created'].apply(
                lambda x: x.strftime('%Y%m%d'))

            # Get daily totals
            daily = users.set_index('time_created').groupby(
                pd.TimeGrouper(freq='D')).count().cumsum().reset_index()[[
                    'time_created', 'email'
                ]]
            daily['time_created'] = daily['time_created'].apply(
                lambda x: x.strftime('%Y%m%d'))

            send_obj = {
                'monthly': {
                    'dates': monthly['time_created'].values.tolist(),
                    'users': monthly['email'].values.tolist()
                },
                'daily': {
                    'dates': daily['time_created'].values.tolist(),
                    'users': daily['email'].values.tolist()
                }
            }
            print(json.dumps(send_obj))

        elif req_type['filter'] == 2:  # Users from a particular department
            if metric2 == 1:  # Number of users from particular department
                gc.connect_to_database()
                gc.create_session()

                users = gc.users.department()
                users['time_created'] = users['time_created'].apply(
                    lambda x: pd.to_datetime(x))

                # Only keep users from indicated department
                # Need to import that list of deps to parse the number from
                dep_list = []
                with open('en_dpts.json') as depts:
                    depts_dict = json.load(depts)
                    for dep in depts_dict.values():
                        dep_list.append(dep)
                # Figure out the corresponding index
                dep = dep_list[metric]

                users = users[users['string'].str.contains(dep)]

                # Get monthly totals
                monthly = users.set_index('time_created').groupby(
                    pd.TimeGrouper(freq='M')).count().cumsum().reset_index()[[
                        'time_created', 'email'
                    ]]
                monthly['time_created'] = monthly['time_created'].apply(
                    lambda x: x.strftime('%Y%m%d'))

                # Get daily totals
                daily = users.set_index('time_created').groupby(
                    pd.TimeGrouper(freq='D')).count().cumsum().reset_index()[[
                        'time_created', 'email'
                    ]]
                daily['time_created'] = daily['time_created'].apply(
                    lambda x: x.strftime('%Y%m%d'))

                send_obj = {
                    'monthly': {
                        'dates': monthly['time_created'].values.tolist(),
                        'users': monthly['email'].values.tolist()
                    },
                    'daily': {
                        'dates': daily['time_created'].values.tolist(),
                        'users': daily['email'].values.tolist()
                    }
                }
                print(json.dumps(send_obj))
            elif metric2 == 2:  # Users opted in to oppurtunities platform from particular department
                gc.connect_to_database()
                gc.create_session()

                users = gc.micromissions.get_users()

                dep_list = []
                with open('en_dpts.json') as depts:
                    depts_dict = json.load(depts)
                    for dep in depts_dict.values():
                        dep_list.append(dep)
                # Figure out the corresponding index
                dep = dep_list[metric]

                users = users[users['department'].str.contains(dep)]
                users = users[users['opt-in'].str.contains('yes')]

                # Get monthly totals
                monthly = users.set_index('time_created').groupby(
                    pd.TimeGrouper(freq='M')).count().cumsum().reset_index()[[
                        'time_created', 'email'
                    ]]
                monthly['time_created'] = monthly['time_created'].apply(
                    lambda x: x.strftime('%Y%m%d'))

                # Get daily totals
                daily = users.set_index('time_created').groupby(
                    pd.TimeGrouper(freq='D')).count().cumsum().reset_index()[[
                        'time_created', 'email'
                    ]]
                daily['time_created'] = daily['time_created'].apply(
                    lambda x: x.strftime('%Y%m%d'))

                send_obj = {
                    'monthly': {
                        'dates': monthly['time_created'].values.tolist(),
                        'users': monthly['email'].values.tolist()
                    },
                    'daily': {
                        'dates': daily['time_created'].values.tolist(),
                        'users': daily['email'].values.tolist()
                    }
                }
                print(json.dumps(send_obj))