Then run this script on the above collection

Usage:

python activities_with_lower_completion.py

'''
from collections import defaultdict

from common.base_edx import EdXConnection
from common.generate_csv_report import CSV

# Connect to MongoDB and extra the tracking collection
connection = EdXConnection('user_attempts_per_problem_id')
collection = connection.get_access_to_collection()

cursor = collection['user_attempts_per_problem_id'].find()
result = defaultdict(lambda: defaultdict(int)) 
for index,document in enumerate(cursor):
    # If there is a correct attempts, accept as answered correctly, else accept
    #as incorrect only once per student per problem id
    if 'correct' in document['attempts']:
        result[document['_id']['problem_id']]['correct'] += 1
    else:
        result[document['_id']['problem_id']]['incorrect'] += 1

csv_result = [[item, result[item]['correct'], result[item]['incorrect']] for item in result]
output = CSV(csv_result, ['Problem Id', 'Correct Count', 'Incorrect Count'], output_file='activities_with_lower_completion.csv')
output.generate_csv()
result = []
for document in cursor:
    answers = []
    for key in sorted(document['event']['correct_map'].keys(),
                      key=lambda x: int(x.split('_')[-2])):
        try:
            answers.append(document['event']['submission'][key]['answer'])
        except KeyError:
            answers.append('')
    result.append([
        document['hash_id'], document['username'], document['event']
        ['attempts'], document['module']['display_name'], document['time'],
        document['event']['success'], document['event']['grade'],
        document['event']['max_grade']
    ] + answers)

if final_attempts:
    result = [
        max(items, key=lambda x: x[1]) for key, items in groupby(
            sorted(result, key=lambda x: x[0]), lambda x: x[0])
    ]

csv_report_name = _generate_name_from_problem_id(problem_id, display_name)
output = CSV(result,
             [
                 'Hash ID', 'Username', 'Attempt Number', 'Module', 'Time',
                 'Success', 'Grade Achieved', 'Max Grade'
             ] + problem_ids,
             output_file=csv_report_name)
output.generate_csv()
Usage:

python activities_with_lower_completion.py

'''
from collections import defaultdict

from common.base_edx import EdXConnection
from common.generate_csv_report import CSV

# Connect to MongoDB and extra the tracking collection
connection = EdXConnection('user_attempts_per_problem_id')
collection = connection.get_access_to_collection()

cursor = collection['user_attempts_per_problem_id'].find()
result = defaultdict(lambda: defaultdict(int))
for index, document in enumerate(cursor):
    # If there is a correct attempts, accept as answered correctly, else accept
    #as incorrect only once per student per problem id
    if 'correct' in document['attempts']:
        result[document['_id']['problem_id']]['correct'] += 1
    else:
        result[document['_id']['problem_id']]['incorrect'] += 1

csv_result = [[item, result[item]['correct'], result[item]['incorrect']]
              for item in result]
output = CSV(csv_result, ['Problem Id', 'Correct Count', 'Incorrect Count'],
             output_file='activities_with_lower_completion.csv')
output.generate_csv()
    '$match': {
        'username': {
            '$in': usernames
        },
        '$or': [{
            'event_type': 'play_video'
        }, {
            'event_type': 'problem_check',
            'event_source': 'server'
        }]
    }
}, {
    '$group': {
        '_id': {
            "username": "******",
            "chapter_name": "$parent_data.chapter_display_name",
            "sequential_name": "$parent_data.sequential_display_name",
            "vertical_name": "$parent_data.vertical_display_name"
        }
    }
}])  #, {'$out' : 'students_50_to_59_events'}])

result = [[
    document['_id']['username'], document['_id']['chapter_name'],
    document['_id']['sequential_name'], document['_id']['vertical_name']
] for document in cursor['result'] if 'chapter_name' in document['_id']]
output = CSV(result,
             ['Username', 'Chapter Name', 'Sequential Name', 'Vertical Name'],
             output_file='failure_analysis_50_to_59.csv')
output.generate_csv()
示例#5
0
from common.base_edx import EdXConnection
from common.generate_csv_report import CSV

connection = EdXConnection('seek_video')
collection = connection.get_access_to_collection()
sort_parameters = [('parent_data.chapter_display_name', 1),
                   ('parent_data.sequential_display_name', 1),
                   ('parent_data.vertical_display_name', 1)]
cursor = collection['seek_video'].find()
result = []
for index, item in enumerate(cursor):
    if 'old_time' in item['event']:
        old_time = item['event']['old_time']
    else:
        old_time = 0
    result.append([
        item['username'], item['parent_data']['chapter_display_name'],
        item['parent_data']['sequential_display_name'],
        item['parent_data']['vertical_display_name'], old_time,
        item['event']['new_time']
    ])

output = CSV(result, [
    'Username', 'Chapter Name', 'Sequential Name', 'Vertical Name', 'Old Time',
    'New Time'
],
             output_file='seek_video.csv',
             row_limit=200000)
output.generate_csv()
示例#6
0
Since we will need to sort a very large number of documents, you should create a separate collection to 
aggregate all required documents in one collection and then extract results from the new collection.

Command to run on the mongo shell to create new collection:

db.tracking_atoc185x.aggregate([{$match : {$and : [{"event_type" : "speed_change_video"},{ "parent_data": { $exists: true } }]}}, {$sort : {"parent_data.chapter_display_name" : 1, "parent_data.sequential_display_name" : 1, "parent_data.vertical_display_name" : 1}}, {$out : "speed_change_video_data"}], {allowDiskUse : true})

Usage: 
python speed_change_video.py

'''

from common.base_edx import EdXConnection
from common.generate_csv_report import CSV

connection = EdXConnection('speed_change_video_data')
collection = connection.get_access_to_collection()
cursor = collection['speed_change_video_data'].find()
result = [[
    item['username'], item['parent_data']['chapter_display_name'],
    item['parent_data']['sequential_display_name'],
    item['parent_data']['vertical_display_name'], item['event']['old_speed'],
    item['event']['new_speed']
] for item in cursor]
output = CSV(result, [
    'Username', 'Chapter Name', 'Sequential Name', 'Vertical Name',
    'Old Speed', 'New Speed'
],
             output_file='speed_change.csv')
output.generate_csv()
db_name = sys.argv[1]

# Change name of collection as required
connection = EdXConnection(db_name, 'user_id_map')
collection = connection.get_access_to_collection()

with open(sys.argv[2]) as f:
    headers = next(f)
    reader = csv.reader(f)
    data = [row for row in reader]

result = []
for row in data:
    username = row[0]
    if username.isdigit():
        username = int(username)
    cursor = collection['user_id_map'].find_one({'username': username})
    if cursor:
        hash_id = cursor['hash_id']
        user_id = cursor['id']
        result.append([username, user_id, hash_id] + row[1:])
    else:
        print "username {0} not in collection".format(row[0])

input_file, extension = sys.argv[2].split('.')
output = CSV(result, [headers.split(',')[0], 'User ID', 'User Hash ID'] +
             headers.split(',')[1:],
             output_file=input_file + '_username_anon.' + extension)
output.generate_csv()
示例#8
0
                print(start_event_time)
                #when a start event is found set the end event to blank
                end_event_time = {'blank'}
                continue
    #assign all other event types to end
    #Note: currently seek_video events count as an end_event
            else:
                end_event_time = datetime.strptime(item['time'].split('+')[0],
                                                   "%Y-%m-%dT%H:%M:%S.%f")
                print 'end'
                print(end_event_time)
                continue
        except:
            count_errors = count_errors + 1
            errors.append([index, item])
#print watch_durations
print
print len(watch_durations)
print errors
output = CSV(watch_durations, [
    'Username', 'video_id', 'youtube id (video_code)', 'time_point (seconds)',
    'start_event_time', 'end_event_time', 'duration (minutes)'
],
             output_file=db_name + 'video_watch_duration.csv',
             row_limit=200000)
output.generate_csv()
output_errors = CSV(errors, ['Errors'],
                    output_file=db_name + 'video_watch_duration_errors.csv',
                    row_limit=200000)
output_errors.generate_csv()
示例#9
0
with open('csv_files/McGillX_CHEM181x_1T2014_grade_report_2014-04-24-1030.csv', 'r') as csv_file:
    reader = csv.reader(csv_file)
    reader.next()
    usernames = [row[2] for row in reader]

NAVIGATION_TABS = {'/courses/McGillX/ATOC185x/2T2014/info' : 'info', '/courses/McGillX/ATOC185x/2T2014/progress' : 'progress', '/courses/McGillX/ATOC185x/2T2014/109d5374b52040e2a8b737cf90c5618a/' : 'syllabus', '/courses/McGillX/ATOC185x/2T2014/441b2c519f5c464883e2ddceb26c5559/' : 'maps','/courses/McGillX/ATOC185x/2T2014/84f630e833eb4dbabe0a6c45c52bb443/' : 'scoreboard' , '/courses/McGillX/ATOC185x/2T2014/e75195cb39fa4e3890a613a1b3c04c7d/' : 'faq', 'courseware' : 'courseware', 'discussion': 'discussion', '/courses/McGillX/ATOC185x/2T2014/instructor' : 'instructor'}

cursor = collection['tracking_atoc185x'].find({'username' : {'$in' : usernames},'event_type' : { '$regex' : '^/courses/McGillX/ATOC185x/2T2014/(info$|progress$|instructor$|109d5374b52040e2a8b737cf90c5618a/$|441b2c519f5c464883e2ddceb26c5559/$|84f630e833eb4dbabe0a6c45c52bb443/$|e75195cb39fa4e3890a613a1b3c04c7d/$|courseware|discussion)'}})

tab_events_per_date = defaultdict(int)
for doc in cursor:
    date = datetime.strptime(doc['time'].split('T')[0], "%Y-%m-%d").date()
    if 'courseware' in doc['event_type']:
        tab_events_per_date[(date,'courseware')] += 1
    elif 'discussion' in doc['event_type']:
        tab_events_per_date[(date, 'discussion')] += 1
    else:
        tab_events_per_date[(date, doc['event_type'])] += 1

result = []
for date, tab in tab_events_per_date:
    result.append([date,tab, tab_events_per_date[(date,tab)]])
output = CSV(result, ['Date','Tab ID','Number of Events'], output_file='number_of_tab_events_per_date_completers.csv')
output.generate_csv()

#with open('csv_files/number_of_tab_events_per_date_completers.csv', 'w') as csv_file:
#    writer = csv.writer(csv_file)
#    writer.writerow(['Date','Tab ID','Number of Events'])
#    for date,tab in tab_events_per_date:
#        writer.writerow([date,tab, tab_events_per_date[(date,tab)] ])
示例#10
0
from datetime import datetime

from common.base_edx import EdXConnection
from common.generate_csv_report import CSV

connection = EdXConnection('student_courseenrollment')
collection = connection.get_access_to_collection()

# Can replace csv file with any csv file that contains the list of usernames
# who completed the course and achieved a certificate. Alternately, one can
# save that info in another collection in mongoDB and extra it from the collection
with open('atoc185x/course_completers.csv') as csv_file:
    reader = csv.reader(csv_file)
    reader.next()
    users = {row[0]: row[1] for row in reader}

result = []
student_courseenrollment = collection['student_courseenrollment'].find()
seen = set()
for document in student_courseenrollment:
    if str(document['user_id']) in users and document['user_id'] not in seen:
        seen.add(document['user_id'])
        result.append([
            document['user_id'], users[str(document['user_id'])],
            document['created'].split()[0]
        ])

output = CSV(result, ['Username', 'Date of Registration'],
             output_file='date_of_registration_completers.csv')
output.generate_csv()
示例#11
0
        '^/courses/McGillX/ATOC185x/2T2014/(info$|progress$|instructor$|109d5374b52040e2a8b737cf90c5618a/$|441b2c519f5c464883e2ddceb26c5559/$|84f630e833eb4dbabe0a6c45c52bb443/$|e75195cb39fa4e3890a613a1b3c04c7d/$|courseware|discussion)'
    }
})
unique_users_per_tab = defaultdict(set)
for doc in cursor:
    if 'courseware' in doc['event_type']:
        unique_users_per_tab['courseware'].add(doc['username'])
    elif 'discussion' in doc['event_type']:
        unique_users_per_tab['discussion'].add(doc['username'])
    else:
        unique_users_per_tab[doc['event_type']].add(doc['username'])

#with open('csv_files/number_of_unique_users_per_navigation_tab.csv', 'w') as csv_file:
#    writer = csv.writer(csv_file)
#    writer.writerow(['Navigation Tab', 'Number of Unique Users'])
#    for key in unique_users_per_tab:
#        writer.writerow([key, len(unique_users_per_tab[key])])
#with open('csv_files/users_per_navigation_tab.csv', 'w') as csv_file:
#    writer = csv.writer(csv_file)
#    writer.writerow(['Navigation Tab','Tab', 'Number of Unique Users'])
#    for key in unique_users_per_tab:
#        writer.writerow([key,NAVIGATION_TABS[key] ,len(unique_users_per_tab[key])])

result = []
for key in unique_users_per_tab:
    result.append([key, NAVIGATION_TABS[key], len(unique_users_per_tab[key])])

output = CSV(result, ['Navigation Tab', 'Tab', 'Number of Unique Users'],
             output_file='number_of_unique_users_per_navigation_tab.csv')
output.generate_csv()
        print "Fail -> %s" % item
        fail.append(item)

print "Number of fail: " + str(len(fail))
if fail:
    import json
    with open('report.txt', 'w') as outfile:
        json.dump(fail, outfile)
else:
    print "no fail"
result = []
for item in users_to_sessions:
    for nested_item in users_to_sessions[item]:
        max_time = max(users_to_sessions[item][nested_item])
        end_time = datetime.strptime(
            max_time.split('+')[0], "%Y-%m-%dT%H:%M:%S.%f")
        min_time = min(users_to_sessions[item][nested_item])
        start_time = datetime.strptime(
            min_time.split('+')[0], "%Y-%m-%dT%H:%M:%S.%f")
        result.append([
            item, nested_item,
            len(users_to_sessions[item][nested_item]), start_time, end_time,
            end_time - start_time
        ])
output = CSV(result, [
    'Username', 'Session ID', 'Number of Events', 'Start Time', 'End Time',
    'Time Spent'
],
             output_file='session_info.csv')
output.generate_csv()
            if item['event_type'] == 'play_video':
                start_event_time = datetime.strptime(item['time'].split('+')[0], "%Y-%m-%dT%H:%M:%S.%f")
    #get values related to video identification
                video = item['event']
                video_code = video['code']
                video_id = video['id']
                time_point = video['currentTime']
                print 'start'
                print (start_event_time)
    #when a start event is found set the end event to blank
                end_event_time = {'blank'}
                continue
    #assign all other event types to end
    #Note: currently seek_video events count as an end_event
            else:
                end_event_time = datetime.strptime(item['time'].split('+')[0], "%Y-%m-%dT%H:%M:%S.%f")
                print 'end'
                print (end_event_time)
                continue
        except:
            count_errors = count_errors+1
            errors.append([index,item])
#print watch_durations
print 
print len(watch_durations)
print errors
output = CSV(watch_durations, ['Username','video_id','youtube id (video_code)','time_point (seconds)','start_event_time','end_event_time', 'duration (minutes)'], output_file=db_name+'video_watch_duration.csv', row_limit=200000) 
output.generate_csv()
output_errors = CSV(errors, ['Errors'], output_file=db_name+'video_watch_duration_errors.csv', row_limit=200000)
output_errors.generate_csv()
示例#14
0
            if start_time <= time_stamp <= end_time:
                if 'sequential_display_name' in document[
                        'parent_data'] and document['parent_data'][
                            'sequential_display_name']:
                    sequential_display_name = document['parent_data'][
                        'sequential_display_name']
                elif document['metadata']['display_name']:
                    sequential_display_name = document['metadata'][
                        'display_name']
                else:
                    sequential_display_name = None
                #users_tests_events[(username, session, document['parent_data'].get('chapter_display_name', None),document['parent_data'].get('sequential_display_name', None))] += 1
                users_tests_events[(username, session,
                                    document['parent_data'].get(
                                        'chapter_display_name',
                                        None), sequential_display_name)] += 1
        except:
            print index, document
result = []
for (username, session, chapter_name, sequential_name) in users_tests_events:
    result.append([
        username, session, chapter_name, sequential_name,
        users_tests_events[(username, session, chapter_name, sequential_name)]
    ])
output = CSV(result, [
    'Username', 'Session ID', 'Chapter Display Name',
    'Sequential Display Name', 'Navigation Count'
],
             output_file='test1_analysis.csv')
output.generate_csv()
python show_transcript_completers.py

'''

import csv

from common.base_edx import EdXConnection
from common.generate_csv_report import CSV

connection = EdXConnection('tracking_atoc185x')
collection = connection.get_access_to_collection()

# Can replace csv file with any csv file that contains the list of usernames 
# who completed the course and achieved a certificate. Alternately, one can
# save that info in another collection in mongoDB and extra it from the collection
with open('atoc185x/course_completers.csv', 'r') as csv_file:
    reader = csv.reader(csv_file)
    reader.next()
    usernames = {row[1] for row in reader}

cursor = collection['tracking_atoc185x'].find({'event_type' : 'show_transcript'})
result = []
seen = set()
for document in cursor:
    if document['username'] in usernames and document['username'] not in seen:
        seen.add(document['username'])
        result.append([document['username']])
output = CSV(result, ['Username'], output_file='show_transcript_completers.csv')
output.generate_csv()
from common.generate_csv_report import CSV

connection = EdXConnection('tracking_atoc185x')
collection = connection.get_access_to_collection()

with open('csv_files/McGillX_CHEM181x_1T2014_grade_report_2014-04-24-1030.csv', 'r') as csv_file:
    reader = csv.reader(csv_file)
    usernames = [row[2] for row in reader]


cursor = collection['tracking_atoc185x'].aggregate([{"$match" : {"event_source" : "browser", "$or" : [{"event_type" : "seq_prev"},{"event_type" : "seq_goto"},{"event_type" : "seq_next"}], 'username' : {'$in' : usernames}}}, {"$group" : {"_id" : {'chapter_name' : "$parent_data.chapter_display_name", "display_name" :  "$metadata.display_name", "event_type"  : "$event_type", "event_old" : "$event.old", "event_new" : "$event.new"}, "count" : {"$sum" : 1}}}])

#with open('csv_files/navigation_frequency_completers.csv', 'w') as csv_file:
#    writer = csv.writer(csv_file)
#    writer.writerow(['Chapter Name', 'Display Name', 'Event Type', 'Event Old', 'Event New', 'Count'])
#    for item in cursor['result']:
#        try:
#            writer.writerow([item['_id']['chapter_name'], item['_id']['display_name'], item['_id']['event_type'], item['_id'].get('event_old', 0), item['_id']['event_new'], item['count']])
#        except:
#           pass 

result = []
for item in cursor['result']:
    try:
        result.append([item['_id']['chapter_name'], item['_id']['display_name'], item['_id']['event_type'], item['_id'].get('event_old', 0), item['_id']['event_new'], item['count']])
    except:
        pass

output = CSV(result, ['Chapter Name', 'Display Name', 'Event Type', 'Event Old', 'Event New', 'Count'], output_file='navigation_frequency_completers.csv')
output.generate_csv()