Python EdXConnection示例，base_edx.EdXConnection Python示例

示例#1

0

显示文件

'''
This module calculates the number of forum threads and posts for a given course
stored in the MongoDB database

Usage:

python <path_to_script> 

'''
from base_edx import EdXConnection

connection = EdXConnection('forum' )
collection = connection.get_access_to_collection()

# Number of documents with _type CommentThread
# A CommentThread represents the first level of interaction: a post that opens 
#a new thread, often a student question of some sort
number_of_comment_threads = collection['forum'].find({'_type' : 'CommentThread'}).count()

# Total number of comments in the forum
number_of_posts = collection['forum'].find().count()

print number_of_posts, number_of_comment_threads

示例#2

0

显示文件

python create_problem_ids_collection.py 

Note:

Make sure the module base_edx is in the same directory as this script so that it 
can be imported

'''
from base_edx import EdXConnection

# The second argument in line 27 is the name of the new collection which will
# contain the results of this script. Each new document will be inserted into
# this new collection. The name of the resulting collection could be anything;
# preferrably relevant to the course
connection = EdXConnection('tracking_atoc185x', 'atoc185x_problem_ids')
collection = connection.get_access_to_collection()

cursor = collection['tracking_atoc185x'].find({
    'event_type': 'problem_check',
    'event_source': 'server'
})
for index, document in enumerate(cursor):
    doc_result = {}
    doc_result['username'] = document['username']
    doc_result['problem_id'] = document['event']['problem_id']
    doc_result['course_id'] = document['context']['course_id']
    doc_result['module'] = document['context']['module']
    doc_result['time'] = document['time']
    doc_result['event'] = document['event']
    collection['atoc185x_problem_ids'].insert(doc_result)

示例#3

0

显示文件

Usage:
python ip_to_country.py <db_name>

'''
import geoip
import csv
from collections import Counter, defaultdict
import sys

from base_edx import EdXConnection
from generate_csv_report import CSV

db_name = sys.argv[1]

# Change name of collection as required
connection = EdXConnection(db_name, 'tracking')
collection = connection.get_access_to_collection()

# The csv file country_code_to_country.csv was retrieved from http://dev.maxmind.com/geoip/legacy/geolite/
# This was used to get a mapping of a country code to a country
with open('country_code_to_country.csv') as f:
    reader = csv.reader(f)
    country_code_to_country = dict(reader)

cursor = collection['tracking'].find()
result = {}
for index, item in enumerate(cursor):
    if item['username'] not in result:
        result[item['username']] = {item['ip']}
    else:
        result[item['username']].add(item['ip'])

示例#4

0

显示文件

python entrance_exit_surveys.py 

'''

from collections import defaultdict
import json
import sys

# These modules can be found under reporting scripts. You will have to add them
# in the same directory as this script
from base_edx import EdXConnection
from generate_csv_report import CSV

db_name = sys.argv[1]
connection = EdXConnection(
    db_name, 'courseware_studentmodule',
    'auth_user')  # EdXConnection('courseware_studentmodule', 'auth_user')
collection = connection.get_access_to_collection()

# Modify key-value pairs in survey_pages to the name of the survey pages and to
# the problem ids that maps to the survey pages E.g. if a course have a

survey_pages = {
    'entrance_survey': {
        'general_info':
        'i4x://McGillX/CHEM181x/problem/c9d2efffbdf043e68789bd60cd4954e3',
        'demographics_background':
        'i4x://McGillX/CHEM181x/problem/134cfc9efb2b400bab2ee1505cc9e4a9',
        'aspirations_motivation':
        'i4x://McGillX/CHEM181x/problem/579ae070227c4f5c973eb02affdcba2a'
    },

示例#5

0

显示文件

文件： navigational_events_completers.py 项目： eduNEXT/edx_data_research

'''
This module counts the number of navigation events: seq_next, seq_prev, seq_goto
for those students who completed the course
'''

import csv

from base_edx import EdXConnection
from generate_csv_report import CSV

connection = EdXConnection('tracking_atoc185x')
collection = connection.get_access_to_collection()

with open('csv_files/McGillX_CHEM181x_1T2014_grade_report_2014-04-24-1030.csv', 'r') as csv_file:
    reader = csv.reader(csv_file)
    usernames = [row[2] for row in reader]


cursor = collection['tracking_atoc185x'].aggregate([{"$match" : {"event_source" : "browser", "$or" : [{"event_type" : "seq_prev"},{"event_type" : "seq_goto"},{"event_type" : "seq_next"}], 'username' : {'$in' : usernames}}}, {"$group" : {"_id" : {'chapter_name' : "$parent_data.chapter_display_name", "display_name" :  "$metadata.display_name", "event_type"  : "$event_type", "event_old" : "$event.old", "event_new" : "$event.new"}, "count" : {"$sum" : 1}}}])

#with open('csv_files/navigation_frequency_completers.csv', 'w') as csv_file:
#    writer = csv.writer(csv_file)
#    writer.writerow(['Chapter Name', 'Display Name', 'Event Type', 'Event Old', 'Event New', 'Count'])
#    for item in cursor['result']:
#        try:
#            writer.writerow([item['_id']['chapter_name'], item['_id']['display_name'], item['_id']['event_type'], item['_id'].get('event_old', 0), item['_id']['event_new'], item['count']])
#        except:
#           pass 

result = []
for item in cursor['result']:

示例#6

0

显示文件

to their hash ids and return a new csv_report

Usage:
python user_id_to_hash_id_reports.py db_name csv_report

'''
import sys
import csv

from base_edx import EdXConnection
from generate_csv_report import CSV 

db_name = sys.argv[1]

# Change name of collection as required
connection = EdXConnection(db_name, 'user_id_map' )
collection = connection.get_access_to_collection()

with open(sys.argv[2]) as f:
    headers = next(f)
    reader = csv.reader(f)
    data = [row for row in reader]

result = []
for row in data:
    cursor = collection['user_id_map'].find_one({'id' : long(row[0])})
    hash_id = cursor['hash_id']
    username = cursor['username']
    result.append([row[0], username, hash_id] + row[1:])

input_file, extension = sys.argv[2].split('.')

示例#7

0

显示文件

文件： chapters_accessed_per_user.py 项目： eduNEXT/edx_data_research

'''
This module determines how many chapters were accessed by each user for a 
given course

Usage:

python chapters_accessed_per_user

'''
from collections import defaultdict

from base_edx import EdXConnection
from generate_csv_report import CSV

connection = EdXConnection('tracking', 'course_structure')
collection = connection.get_access_to_collection()

# Get all chapters
chapters = collection['course_structure'].distinct(
    'parent_data.chapter_display_name')

tracking = collection['tracking'].find()
result = []
for document in tracking:
    if 'parent_data' in document:
        pass

output = CSV(result, ['Username'].extend(chapters),
             output_file='atoc185x_chapters_accesses_per_user.csv')
output.generate_csv()

示例#8

0

显示文件

文件： failure_analysis.py 项目： eduNEXT/edx_data_research

from generate_csv_report import CSV

# If you have access to the grade report provided by edX, you can use the following
# 7 lines of code to get all usernames with grades between 50% and 59% inclusive
#with open('csv_files/grades_report.csv') as f:
#    reader = csv.reader(f)
#    header = reader.next()
#    usernames = [row[2] for row in reader if '0.5' <= row[3] <= '0.59']
#connection = EdXConnection('tracking')
#collection = connection.get_access_to_collection()
#cursor = collection['tracking'].aggregate([{'$match' : {'username' : {'$in' : usernames}, '$or': [{'event_type' : 'play_video'},{'event_type' : 'problem_check', 'event_source' : 'server'}]}},{'$group' : { '_id' : { "username" : "$username", "chapter_name" : "$parent_data.chapter_display_name" ,"sequential_name" : "$parent_data.sequential_display_name","vertical_name" : "$parent_data.vertical_display_name"}}}, {'$out' : 'students_50_to_59_events'}])

# Else you can extract the names of the students with grades betweeb 50% and 59%
# inclusive from the collection certificates_generatedcertificate from the
# following lines of code
connection = EdXConnection('tracking_atoc185x', 'auth_user',
                           'certificates_generatedcertificate')
collection = connection.get_access_to_collection()

# Get all user ids of students with grades between 50% and 59% inclusive from
# the collection certificates_generatedcertificate
user_ids = {
    document['user_id']
    for document in collection['certificates_generatedcertificate'].find(
        {'$and': [{
            'grade': {
                '$gte': 0.5
            }
        }, {
            'grade': {
                '$lte': 0.59
            }

示例#9

0

显示文件

文件： date_of_registration_completers.py 项目： eduNEXT/edx_data_research

'''
This module gets the date of registration of all users who completed the course

Usage:

python date_of_registration_completers.py 

'''

import csv
from datetime import datetime

from base_edx import EdXConnection
from generate_csv_report import CSV

connection = EdXConnection('student_courseenrollment')
collection = connection.get_access_to_collection()

# Can replace csv file with any csv file that contains the list of usernames
# who completed the course and achieved a certificate. Alternately, one can
# save that info in another collection in mongoDB and extra it from the collection
with open('atoc185x/course_completers.csv') as csv_file:
    reader = csv.reader(csv_file)
    reader.next()
    users = {row[0]: row[1] for row in reader}

result = []
student_courseenrollment = collection['student_courseenrollment'].find()
seen = set()
for document in student_courseenrollment:
    if str(document['user_id']) in users and document['user_id'] not in seen:

示例#10

0

显示文件

文件： test1_analysis.py 项目： eduNEXT/edx_data_research

'''
This module gets the number of navigation events for each user while they are taking Test 1
'''
from collections import defaultdict
import time
from datetime import datetime
from generate_csv_report import CSV

from base_edx import EdXConnection

connection = EdXConnection('format_tests', 'tracking')
collection = connection.get_access_to_collection()

cursor = collection['format_tests'].find({'parent_data.chapter_display_name' : 'Test 1'})
users_sessions = defaultdict(list)

for index,item in enumerate(cursor):
    #print index, item['parent_data']['chapter_display_name']
    users_sessions[(item['username'], item['session'])].append(item['time'])
users_tests_events = defaultdict(int)
for (username,session),times in users_sessions.iteritems():
    end_time = datetime.strptime(max(times).split('+')[0], "%Y-%m-%dT%H:%M:%S.%f")
    start_time = datetime.strptime(min(times).split('+')[0], "%Y-%m-%dT%H:%M:%S.%f")
    cursor = collection['tracking'].find({'username' : username, 'session' : session, '$or' : [{'event_type' : 'seq_goto'},{'event_type':'seq_prev'},{'event_type' : 'seq_next'}]})
    for index, document in enumerate(cursor):
        try:
            time_stamp = datetime.strptime(document['time'].split('+')[0], "%Y-%m-%dT%H:%M:%S.%f")
            if start_time <= time_stamp <= end_time:
                if 'sequential_display_name' in document['parent_data'] and  document['parent_data']['sequential_display_name']:
                    sequential_display_name = document['parent_data']['sequential_display_name']
                elif document['metadata']['display_name']:

示例#11

0

显示文件

'''
This module gets all the events per user while watching videos. Since we will
need to sort a very large number of documents, user should create a separate
collection to aggregate all required documents in one collection and then 
extract results from the new collection
Command to run on the mongo shell to creare new collection:

db.tracking_atoc185x.aggregate([{$match : {$and : [{"event_type" : "seek_video"},{ "parent_data": { $exists: true } }]}}, {$sort : {"parent_data.chapter_display_name" : 1, "parent_data.sequential_display_name" : 1, "parent_data.vertical_display_name" : 1}}, {$out : "seek_video"}, {allowDiskUse : true}])

'''

from base_edx import EdXConnection
from generate_csv_report import CSV

connection = EdXConnection('seek_video')
collection = connection.get_access_to_collection()
sort_parameters = [('parent_data.chapter_display_name', 1),
                   ('parent_data.sequential_display_name', 1),
                   ('parent_data.vertical_display_name', 1)]
cursor = collection['seek_video'].find()
result = []
for index, item in enumerate(cursor):
    if 'old_time' in item['event']:
        old_time = item['event']['old_time']
    else:
        old_time = 0
    result.append([
        item['username'], item['parent_data']['chapter_display_name'],
        item['parent_data']['sequential_display_name'],
        item['parent_data']['vertical_display_name'], old_time,
        item['event']['new_time']

示例#12

0

显示文件

文件： first_activity_completers.py 项目： eduNEXT/edx_data_research

'''
This module retrieve the first activity of all user who completed a course
The list of users who has completed the course was provided in a given csv file,
McGillX_CHEM181x_1T2014_grade_report_2014-04-24-1030.csv. When these users are
extracted, we then check the database for all of these users and detect their 
first activity. First activity is defined by the event_type '/courses/McGillX/CHEM181x/1T2014/info'

'''
import csv
from datetime import datetime
from collections import defaultdict

from base_edx import EdXConnection

# Connect to MongoDB and extra the tracking collection
connection = EdXConnection('tracking', 'tracking_before_jan22')
collection = connection.get_access_to_collection()

# Retrieve users who has completed the course. This could be done anyway depending on what is provided
with open('csv_files/McGillX_CHEM181x_1T2014_grade_report_2014-04-24-1030.csv',
          'r') as csv_file:
    reader = csv.reader(csv_file)
    #usernames = [row[2] for row in reader]
    usernames = [row[2] for row in reader]

# Retrieve the time of the first activity of all users who completed the course
time_events = defaultdict(list)
cursor = collection['tracking'].find(
    {'event_type': '/courses/McGillX/CHEM181x/1T2014/info'})
#cursor_before_jan_22 = collection['tracking_before_jan22'].find({'event_type' : '/courses/McGillX/CHEM181x/1T2014/info'})
with open('csv_files/first_activity_completers.csv', 'w') as csv_file:

示例#13

0

显示文件

username, video associated with load_video event, parent_data: {chapter_display_name, sequential_display_name, vertical_display_name,}, edx_video_id, video watch segments

get the event_types : load_video, play_video, pause_video, seek_video

sort by "time": "" so that the events are chronologically ordered

for each load_video new video watch segment should include ONLY:

- time between play_video -> next video event in time (pause_video or seek_video)
- time between seek_video : {'new_time' : Time}  -> pause video (only with new_time > old_time, this is to avoid including rewinds)

watch periods:
    event_type : pause_video - "event_type":"play_video" {"event":{"currentTime":TIME}} = new video watch segment
    if seek_video : {'old_time' : TIME} < seek_video : {'new_time' : TIME} 
      "pause_video" {"event":{"currentTime":TIME}} - seek_video : {'new_time': TIME } = new video watch segment

    if seek_video : {'old_time' : TIME} > seek_video : {'new_time' : TIME} = rewind (exclude from watch segments)
'''

from base_edx import EdXConnection
from generate_csv_report import CSV

connection = EdXConnection('video_watch_duration_collection')
collection = connection.get_access_to_collection()
cursor = collection['video_watch_duration_collection'].find()

result = []

output = CSV(result,['Username',], output_file='video_watch_duration.csv', row_limit=200000) 
output.generate_csv()

示例#14

0

显示文件

文件： map_answer_key_to_value.py 项目： XUHAOLIANG2014/edx_data_research

import csv
import sys
import json

# These modules can be found under reporting scripts. You will have to add them
# in the same directory as this script
from base_edx import EdXConnection
from generate_csv_report import CSV

db_name = sys.argv[1]
answer_distribution = sys.argv[2]
connection = EdXConnection(db_name, 'tracking' )
collection = connection.get_access_to_collection()

with open(answer_distribution) as f:
    csv_f = csv.reader(f)
    headers = csv_f.next()
    rows = [row for row in csv_f]

answer_keys = dict()

for row in rows:
    problem_id = row[1]
    value = row[4]
    answer_value = row[5]
    if value:
        value = value.replace('[', '').replace(']', '').split('|')
        answer_value = answer_value.replace('[', '').replace(']', '').split('|')
        dict_value_answer = dict(zip(value, answer_value))
        try:
            answer_keys[problem_id].update(dict_value_answer)

示例#15

0

显示文件

文件： activities_with_lower_completion.py 项目： eduNEXT/edx_data_research

db.tracking_atoc185x.aggregate([{$match : {event_type : 'problem_check', 'event_source': 'server'}}, {$group : {_id : {"username" : "$username", "problem_id" : "$event.problem_id"}, attempts : {$push : "$event.success"}}}, {$out : "user_attempts_per_problem_id"}])

Then run this script on the above collection

Usage:

python activities_with_lower_completion.py

'''
from collections import defaultdict

from base_edx import EdXConnection
from generate_csv_report import CSV

# Connect to MongoDB and extra the tracking collection
connection = EdXConnection('user_attempts_per_problem_id')
collection = connection.get_access_to_collection()

cursor = collection['user_attempts_per_problem_id'].find()
result = defaultdict(lambda: defaultdict(int))
for index, document in enumerate(cursor):
    # If there is a correct attempts, accept as answered correctly, else accept
    #as incorrect only once per student per problem id
    if 'correct' in document['attempts']:
        result[document['_id']['problem_id']]['correct'] += 1
    else:
        result[document['_id']['problem_id']]['incorrect'] += 1

csv_result = [[item, result[item]['correct'], result[item]['incorrect']]
              for item in result]
output = CSV(csv_result, ['Problem Id', 'Correct Count', 'Incorrect Count'],

示例#16

0

显示文件

文件： user_info.py 项目： eduNEXT/edx_data_research

'''
This module will retrieve info about students registered in the course

Usage:

python user_info.py

'''

from base_edx import EdXConnection
from generate_csv_report import CSV

connection = EdXConnection('certificates_generatedcertificate',
                           'auth_userprofile')
collection = connection.get_access_to_collection()
documents = collection['auth_userprofile'].find()

result = []
for document in documents:
    user_id = document['user_id']
    try:
        final_grade = collection['certificates_generatedcertificate'].find_one(
            {'user_id': user_id})['grade']
        result.append([
            user_id, document['name'], final_grade, document['gender'],
            document['year_of_birth'], document['level_of_education'],
            document['country'], document['city']
        ])
    except:
        # Handle users with no grades
        pass

示例#17

0

显示文件

Since we will need to sort a very large number of documents, you should create a separate collection to 
aggregate all required documents in one collection and then extract results from the new collection.

Command to run on the mongo shell to create new collection:

db.tracking_atoc185x.aggregate([{$match : {$and : [{"event_type" : "speed_change_video"},{ "parent_data": { $exists: true } }]}}, {$sort : {"parent_data.chapter_display_name" : 1, "parent_data.sequential_display_name" : 1, "parent_data.vertical_display_name" : 1}}, {$out : "speed_change_video_data"}], {allowDiskUse : true})

Usage: 
python speed_change_video.py

'''

from base_edx import EdXConnection
from generate_csv_report import CSV

connection = EdXConnection('speed_change_video_data')
collection = connection.get_access_to_collection()
cursor = collection['speed_change_video_data'].find()
result = [[
    item['username'], item['parent_data']['chapter_display_name'],
    item['parent_data']['sequential_display_name'],
    item['parent_data']['vertical_display_name'], item['event']['old_speed'],
    item['event']['new_speed']
] for item in cursor]
output = CSV(result, [
    'Username', 'Chapter Name', 'Sequential Name', 'Vertical Name',
    'Old Speed', 'New Speed'
],
             output_file='speed_change.csv')
output.generate_csv()