Then run this script on the above collection Usage: python activities_with_lower_completion.py ''' from collections import defaultdict from common.base_edx import EdXConnection from common.generate_csv_report import CSV # Connect to MongoDB and extra the tracking collection connection = EdXConnection('user_attempts_per_problem_id') collection = connection.get_access_to_collection() cursor = collection['user_attempts_per_problem_id'].find() result = defaultdict(lambda: defaultdict(int)) for index,document in enumerate(cursor): # If there is a correct attempts, accept as answered correctly, else accept #as incorrect only once per student per problem id if 'correct' in document['attempts']: result[document['_id']['problem_id']]['correct'] += 1 else: result[document['_id']['problem_id']]['incorrect'] += 1 csv_result = [[item, result[item]['correct'], result[item]['incorrect']] for item in result] output = CSV(csv_result, ['Problem Id', 'Correct Count', 'Incorrect Count'], output_file='activities_with_lower_completion.csv') output.generate_csv()
result = [] for document in cursor: answers = [] for key in sorted(document['event']['correct_map'].keys(), key=lambda x: int(x.split('_')[-2])): try: answers.append(document['event']['submission'][key]['answer']) except KeyError: answers.append('') result.append([ document['hash_id'], document['username'], document['event'] ['attempts'], document['module']['display_name'], document['time'], document['event']['success'], document['event']['grade'], document['event']['max_grade'] ] + answers) if final_attempts: result = [ max(items, key=lambda x: x[1]) for key, items in groupby( sorted(result, key=lambda x: x[0]), lambda x: x[0]) ] csv_report_name = _generate_name_from_problem_id(problem_id, display_name) output = CSV(result, [ 'Hash ID', 'Username', 'Attempt Number', 'Module', 'Time', 'Success', 'Grade Achieved', 'Max Grade' ] + problem_ids, output_file=csv_report_name) output.generate_csv()
Usage: python activities_with_lower_completion.py ''' from collections import defaultdict from common.base_edx import EdXConnection from common.generate_csv_report import CSV # Connect to MongoDB and extra the tracking collection connection = EdXConnection('user_attempts_per_problem_id') collection = connection.get_access_to_collection() cursor = collection['user_attempts_per_problem_id'].find() result = defaultdict(lambda: defaultdict(int)) for index, document in enumerate(cursor): # If there is a correct attempts, accept as answered correctly, else accept #as incorrect only once per student per problem id if 'correct' in document['attempts']: result[document['_id']['problem_id']]['correct'] += 1 else: result[document['_id']['problem_id']]['incorrect'] += 1 csv_result = [[item, result[item]['correct'], result[item]['incorrect']] for item in result] output = CSV(csv_result, ['Problem Id', 'Correct Count', 'Incorrect Count'], output_file='activities_with_lower_completion.csv') output.generate_csv()
'$match': { 'username': { '$in': usernames }, '$or': [{ 'event_type': 'play_video' }, { 'event_type': 'problem_check', 'event_source': 'server' }] } }, { '$group': { '_id': { "username": "******", "chapter_name": "$parent_data.chapter_display_name", "sequential_name": "$parent_data.sequential_display_name", "vertical_name": "$parent_data.vertical_display_name" } } }]) #, {'$out' : 'students_50_to_59_events'}]) result = [[ document['_id']['username'], document['_id']['chapter_name'], document['_id']['sequential_name'], document['_id']['vertical_name'] ] for document in cursor['result'] if 'chapter_name' in document['_id']] output = CSV(result, ['Username', 'Chapter Name', 'Sequential Name', 'Vertical Name'], output_file='failure_analysis_50_to_59.csv') output.generate_csv()
from common.base_edx import EdXConnection from common.generate_csv_report import CSV connection = EdXConnection('seek_video') collection = connection.get_access_to_collection() sort_parameters = [('parent_data.chapter_display_name', 1), ('parent_data.sequential_display_name', 1), ('parent_data.vertical_display_name', 1)] cursor = collection['seek_video'].find() result = [] for index, item in enumerate(cursor): if 'old_time' in item['event']: old_time = item['event']['old_time'] else: old_time = 0 result.append([ item['username'], item['parent_data']['chapter_display_name'], item['parent_data']['sequential_display_name'], item['parent_data']['vertical_display_name'], old_time, item['event']['new_time'] ]) output = CSV(result, [ 'Username', 'Chapter Name', 'Sequential Name', 'Vertical Name', 'Old Time', 'New Time' ], output_file='seek_video.csv', row_limit=200000) output.generate_csv()
Since we will need to sort a very large number of documents, you should create a separate collection to aggregate all required documents in one collection and then extract results from the new collection. Command to run on the mongo shell to create new collection: db.tracking_atoc185x.aggregate([{$match : {$and : [{"event_type" : "speed_change_video"},{ "parent_data": { $exists: true } }]}}, {$sort : {"parent_data.chapter_display_name" : 1, "parent_data.sequential_display_name" : 1, "parent_data.vertical_display_name" : 1}}, {$out : "speed_change_video_data"}], {allowDiskUse : true}) Usage: python speed_change_video.py ''' from common.base_edx import EdXConnection from common.generate_csv_report import CSV connection = EdXConnection('speed_change_video_data') collection = connection.get_access_to_collection() cursor = collection['speed_change_video_data'].find() result = [[ item['username'], item['parent_data']['chapter_display_name'], item['parent_data']['sequential_display_name'], item['parent_data']['vertical_display_name'], item['event']['old_speed'], item['event']['new_speed'] ] for item in cursor] output = CSV(result, [ 'Username', 'Chapter Name', 'Sequential Name', 'Vertical Name', 'Old Speed', 'New Speed' ], output_file='speed_change.csv') output.generate_csv()
db_name = sys.argv[1] # Change name of collection as required connection = EdXConnection(db_name, 'user_id_map') collection = connection.get_access_to_collection() with open(sys.argv[2]) as f: headers = next(f) reader = csv.reader(f) data = [row for row in reader] result = [] for row in data: username = row[0] if username.isdigit(): username = int(username) cursor = collection['user_id_map'].find_one({'username': username}) if cursor: hash_id = cursor['hash_id'] user_id = cursor['id'] result.append([username, user_id, hash_id] + row[1:]) else: print "username {0} not in collection".format(row[0]) input_file, extension = sys.argv[2].split('.') output = CSV(result, [headers.split(',')[0], 'User ID', 'User Hash ID'] + headers.split(',')[1:], output_file=input_file + '_username_anon.' + extension) output.generate_csv()
print(start_event_time) #when a start event is found set the end event to blank end_event_time = {'blank'} continue #assign all other event types to end #Note: currently seek_video events count as an end_event else: end_event_time = datetime.strptime(item['time'].split('+')[0], "%Y-%m-%dT%H:%M:%S.%f") print 'end' print(end_event_time) continue except: count_errors = count_errors + 1 errors.append([index, item]) #print watch_durations print print len(watch_durations) print errors output = CSV(watch_durations, [ 'Username', 'video_id', 'youtube id (video_code)', 'time_point (seconds)', 'start_event_time', 'end_event_time', 'duration (minutes)' ], output_file=db_name + 'video_watch_duration.csv', row_limit=200000) output.generate_csv() output_errors = CSV(errors, ['Errors'], output_file=db_name + 'video_watch_duration_errors.csv', row_limit=200000) output_errors.generate_csv()
with open('csv_files/McGillX_CHEM181x_1T2014_grade_report_2014-04-24-1030.csv', 'r') as csv_file: reader = csv.reader(csv_file) reader.next() usernames = [row[2] for row in reader] NAVIGATION_TABS = {'/courses/McGillX/ATOC185x/2T2014/info' : 'info', '/courses/McGillX/ATOC185x/2T2014/progress' : 'progress', '/courses/McGillX/ATOC185x/2T2014/109d5374b52040e2a8b737cf90c5618a/' : 'syllabus', '/courses/McGillX/ATOC185x/2T2014/441b2c519f5c464883e2ddceb26c5559/' : 'maps','/courses/McGillX/ATOC185x/2T2014/84f630e833eb4dbabe0a6c45c52bb443/' : 'scoreboard' , '/courses/McGillX/ATOC185x/2T2014/e75195cb39fa4e3890a613a1b3c04c7d/' : 'faq', 'courseware' : 'courseware', 'discussion': 'discussion', '/courses/McGillX/ATOC185x/2T2014/instructor' : 'instructor'} cursor = collection['tracking_atoc185x'].find({'username' : {'$in' : usernames},'event_type' : { '$regex' : '^/courses/McGillX/ATOC185x/2T2014/(info$|progress$|instructor$|109d5374b52040e2a8b737cf90c5618a/$|441b2c519f5c464883e2ddceb26c5559/$|84f630e833eb4dbabe0a6c45c52bb443/$|e75195cb39fa4e3890a613a1b3c04c7d/$|courseware|discussion)'}}) tab_events_per_date = defaultdict(int) for doc in cursor: date = datetime.strptime(doc['time'].split('T')[0], "%Y-%m-%d").date() if 'courseware' in doc['event_type']: tab_events_per_date[(date,'courseware')] += 1 elif 'discussion' in doc['event_type']: tab_events_per_date[(date, 'discussion')] += 1 else: tab_events_per_date[(date, doc['event_type'])] += 1 result = [] for date, tab in tab_events_per_date: result.append([date,tab, tab_events_per_date[(date,tab)]]) output = CSV(result, ['Date','Tab ID','Number of Events'], output_file='number_of_tab_events_per_date_completers.csv') output.generate_csv() #with open('csv_files/number_of_tab_events_per_date_completers.csv', 'w') as csv_file: # writer = csv.writer(csv_file) # writer.writerow(['Date','Tab ID','Number of Events']) # for date,tab in tab_events_per_date: # writer.writerow([date,tab, tab_events_per_date[(date,tab)] ])
from datetime import datetime from common.base_edx import EdXConnection from common.generate_csv_report import CSV connection = EdXConnection('student_courseenrollment') collection = connection.get_access_to_collection() # Can replace csv file with any csv file that contains the list of usernames # who completed the course and achieved a certificate. Alternately, one can # save that info in another collection in mongoDB and extra it from the collection with open('atoc185x/course_completers.csv') as csv_file: reader = csv.reader(csv_file) reader.next() users = {row[0]: row[1] for row in reader} result = [] student_courseenrollment = collection['student_courseenrollment'].find() seen = set() for document in student_courseenrollment: if str(document['user_id']) in users and document['user_id'] not in seen: seen.add(document['user_id']) result.append([ document['user_id'], users[str(document['user_id'])], document['created'].split()[0] ]) output = CSV(result, ['Username', 'Date of Registration'], output_file='date_of_registration_completers.csv') output.generate_csv()
'^/courses/McGillX/ATOC185x/2T2014/(info$|progress$|instructor$|109d5374b52040e2a8b737cf90c5618a/$|441b2c519f5c464883e2ddceb26c5559/$|84f630e833eb4dbabe0a6c45c52bb443/$|e75195cb39fa4e3890a613a1b3c04c7d/$|courseware|discussion)' } }) unique_users_per_tab = defaultdict(set) for doc in cursor: if 'courseware' in doc['event_type']: unique_users_per_tab['courseware'].add(doc['username']) elif 'discussion' in doc['event_type']: unique_users_per_tab['discussion'].add(doc['username']) else: unique_users_per_tab[doc['event_type']].add(doc['username']) #with open('csv_files/number_of_unique_users_per_navigation_tab.csv', 'w') as csv_file: # writer = csv.writer(csv_file) # writer.writerow(['Navigation Tab', 'Number of Unique Users']) # for key in unique_users_per_tab: # writer.writerow([key, len(unique_users_per_tab[key])]) #with open('csv_files/users_per_navigation_tab.csv', 'w') as csv_file: # writer = csv.writer(csv_file) # writer.writerow(['Navigation Tab','Tab', 'Number of Unique Users']) # for key in unique_users_per_tab: # writer.writerow([key,NAVIGATION_TABS[key] ,len(unique_users_per_tab[key])]) result = [] for key in unique_users_per_tab: result.append([key, NAVIGATION_TABS[key], len(unique_users_per_tab[key])]) output = CSV(result, ['Navigation Tab', 'Tab', 'Number of Unique Users'], output_file='number_of_unique_users_per_navigation_tab.csv') output.generate_csv()
print "Fail -> %s" % item fail.append(item) print "Number of fail: " + str(len(fail)) if fail: import json with open('report.txt', 'w') as outfile: json.dump(fail, outfile) else: print "no fail" result = [] for item in users_to_sessions: for nested_item in users_to_sessions[item]: max_time = max(users_to_sessions[item][nested_item]) end_time = datetime.strptime( max_time.split('+')[0], "%Y-%m-%dT%H:%M:%S.%f") min_time = min(users_to_sessions[item][nested_item]) start_time = datetime.strptime( min_time.split('+')[0], "%Y-%m-%dT%H:%M:%S.%f") result.append([ item, nested_item, len(users_to_sessions[item][nested_item]), start_time, end_time, end_time - start_time ]) output = CSV(result, [ 'Username', 'Session ID', 'Number of Events', 'Start Time', 'End Time', 'Time Spent' ], output_file='session_info.csv') output.generate_csv()
if item['event_type'] == 'play_video': start_event_time = datetime.strptime(item['time'].split('+')[0], "%Y-%m-%dT%H:%M:%S.%f") #get values related to video identification video = item['event'] video_code = video['code'] video_id = video['id'] time_point = video['currentTime'] print 'start' print (start_event_time) #when a start event is found set the end event to blank end_event_time = {'blank'} continue #assign all other event types to end #Note: currently seek_video events count as an end_event else: end_event_time = datetime.strptime(item['time'].split('+')[0], "%Y-%m-%dT%H:%M:%S.%f") print 'end' print (end_event_time) continue except: count_errors = count_errors+1 errors.append([index,item]) #print watch_durations print print len(watch_durations) print errors output = CSV(watch_durations, ['Username','video_id','youtube id (video_code)','time_point (seconds)','start_event_time','end_event_time', 'duration (minutes)'], output_file=db_name+'video_watch_duration.csv', row_limit=200000) output.generate_csv() output_errors = CSV(errors, ['Errors'], output_file=db_name+'video_watch_duration_errors.csv', row_limit=200000) output_errors.generate_csv()
if start_time <= time_stamp <= end_time: if 'sequential_display_name' in document[ 'parent_data'] and document['parent_data'][ 'sequential_display_name']: sequential_display_name = document['parent_data'][ 'sequential_display_name'] elif document['metadata']['display_name']: sequential_display_name = document['metadata'][ 'display_name'] else: sequential_display_name = None #users_tests_events[(username, session, document['parent_data'].get('chapter_display_name', None),document['parent_data'].get('sequential_display_name', None))] += 1 users_tests_events[(username, session, document['parent_data'].get( 'chapter_display_name', None), sequential_display_name)] += 1 except: print index, document result = [] for (username, session, chapter_name, sequential_name) in users_tests_events: result.append([ username, session, chapter_name, sequential_name, users_tests_events[(username, session, chapter_name, sequential_name)] ]) output = CSV(result, [ 'Username', 'Session ID', 'Chapter Display Name', 'Sequential Display Name', 'Navigation Count' ], output_file='test1_analysis.csv') output.generate_csv()
python show_transcript_completers.py ''' import csv from common.base_edx import EdXConnection from common.generate_csv_report import CSV connection = EdXConnection('tracking_atoc185x') collection = connection.get_access_to_collection() # Can replace csv file with any csv file that contains the list of usernames # who completed the course and achieved a certificate. Alternately, one can # save that info in another collection in mongoDB and extra it from the collection with open('atoc185x/course_completers.csv', 'r') as csv_file: reader = csv.reader(csv_file) reader.next() usernames = {row[1] for row in reader} cursor = collection['tracking_atoc185x'].find({'event_type' : 'show_transcript'}) result = [] seen = set() for document in cursor: if document['username'] in usernames and document['username'] not in seen: seen.add(document['username']) result.append([document['username']]) output = CSV(result, ['Username'], output_file='show_transcript_completers.csv') output.generate_csv()
from common.generate_csv_report import CSV connection = EdXConnection('tracking_atoc185x') collection = connection.get_access_to_collection() with open('csv_files/McGillX_CHEM181x_1T2014_grade_report_2014-04-24-1030.csv', 'r') as csv_file: reader = csv.reader(csv_file) usernames = [row[2] for row in reader] cursor = collection['tracking_atoc185x'].aggregate([{"$match" : {"event_source" : "browser", "$or" : [{"event_type" : "seq_prev"},{"event_type" : "seq_goto"},{"event_type" : "seq_next"}], 'username' : {'$in' : usernames}}}, {"$group" : {"_id" : {'chapter_name' : "$parent_data.chapter_display_name", "display_name" : "$metadata.display_name", "event_type" : "$event_type", "event_old" : "$event.old", "event_new" : "$event.new"}, "count" : {"$sum" : 1}}}]) #with open('csv_files/navigation_frequency_completers.csv', 'w') as csv_file: # writer = csv.writer(csv_file) # writer.writerow(['Chapter Name', 'Display Name', 'Event Type', 'Event Old', 'Event New', 'Count']) # for item in cursor['result']: # try: # writer.writerow([item['_id']['chapter_name'], item['_id']['display_name'], item['_id']['event_type'], item['_id'].get('event_old', 0), item['_id']['event_new'], item['count']]) # except: # pass result = [] for item in cursor['result']: try: result.append([item['_id']['chapter_name'], item['_id']['display_name'], item['_id']['event_type'], item['_id'].get('event_old', 0), item['_id']['event_new'], item['count']]) except: pass output = CSV(result, ['Chapter Name', 'Display Name', 'Event Type', 'Event Old', 'Event New', 'Count'], output_file='navigation_frequency_completers.csv') output.generate_csv()