예제 #1
0
def TransformResourceData(vars):
    
    fields = {
        'resource_id': 'num',
        'resource_type_id': 'num',
        'resource_name': 'string',
        'resource_uri': 'string',
        'resource_parent_id': 'num',
        'resource_child_number': 'num',
    }
    
    t = vars['target']
    resource_inserter = db.StaggeredInsert(t['host'], t['user'], t['password'], t['port'], t['db'], 'resources', fields)
    
    resource_id_map = {}
    resources = [
        {'type': 'indices', 'parent_type': None, 'items': vars['queries'].GetIndices(vars)},
        {'type': 'content_sections', 'parent_type': None, 'items': vars['queries'].GetContentSections(vars)},
        {'type': 'tutorials', 'parent_type': 'content_sections', 'items': vars['queries'].GetTutorials(vars)},
        {'type': 'tests', 'parent_type': 'content_sections', 'items': vars['queries'].GetTests(vars)},
        {'type': 'books', 'parent_type': None, 'items': vars['queries'].GetBooks(vars)},
        {'type': 'wikis', 'parent_type': None, 'items': vars['queries'].GetWikis(vars)},
        {'type': 'forums', 'parent_type': None, 'items': vars['queries'].GetForums(vars)},
    ]
    
    resource_moocdb_id = 1
    
    for resource_subset in resources:
        if resource_subset['type'] not in resource_id_map.keys(): resource_id_map[resource_subset['type']] = {}
        for item in resource_subset['items']:
            item['resource_id'] = resource_moocdb_id
            resource_id_map[resource_subset['type']][item['original_id']] = resource_moocdb_id
            resource_moocdb_id += 1
            
            rpt = resource_subset['parent_type']
            rpoid = item['resource_parent_original_id']
            
            if rpt != None and rpoid != None and rpoid in resource_id_map[rpt].keys():
                rpmid = resource_id_map[rpt][rpoid]
                item['resource_parent_id'] = rpmid
            else:
                item['resource_parent_id'] = None
            
            resource_inserter.addRow({k: item[k] for k in fields})
            
    resource_inserter.insertPendingRows()
    
    vars["logger"].Log(vars, "Counts: Inserted {} resources to target".format(resource_inserter.num_inserted_rows))
    
    return resource_id_map
예제 #2
0
파일: tests.py 프로젝트: EDUlib/eTracesX
def InsertAssessments(vars, assessments):
    fields = {
        'assessment_id': 'num',
        'submission_id': 'num',
        'assessment_grade': 'num',
        'assessment_max_grade': 'num',
        'assessment_grader_id': 'num',
        'assessment_timestamp': 'datetime',
    }
    
    t = vars['target']
    assessment_inserter = db.StaggeredInsert(t['host'], t['user'], t['password'], t['port'], t['db'], 'assessments', fields)
    for assessment in assessments:
        assessment_inserter.addRow({k: assessment[k] for k in fields})

    assessment_inserter.insertPendingRows()
예제 #3
0
파일: tests.py 프로젝트: EDUlib/eTracesX
def InsertSubmissions(vars, submissions):
    fields = {
        'submission_id': 'num',
        'user_id': 'num',
        'problem_id': 'num',
        'submission_timestamp': 'datetime',
        'submission_answer': 'string',
        'submission_attempt_number': 'num',
        'submission_is_submitted': 'num',
    }
    
    t = vars['target']
    submission_inserter = db.StaggeredInsert(t['host'], t['user'], t['password'], t['port'], t['db'], 'submissions', fields)
    for submission in submissions:
        submission_inserter.addRow({k: submission[k] for k in fields})

    submission_inserter.insertPendingRows()
예제 #4
0
def InsertObservedEvents(vars, events):
    fields = {
        'observed_event_type_id': 'num',
        'user_id': 'num',
        'item_id': 'num',
        'observed_event_timestamp': 'datetime',
        'observed_event_data': 'string',
    }

    t = vars['target']
    target_db_selector = db.Selector(t['host'], t['user'], t['password'],
                                     t['port'], t['db'])
    oe_inserter = db.StaggeredInsert(t['host'], t['user'], t['password'],
                                     t['port'], t['db'], 'observed_events',
                                     fields)
    for event in events:
        oe_inserter.addRow({k: event[k] for k in fields})

    oe_inserter.insertPendingRows()
예제 #5
0
파일: tests.py 프로젝트: EDUlib/eTracesX
def TransformTestData(vars):
    test_id_maps = {}
    
    # Problems
    # ---------
    
    fields = {
        'problem_id': 'num',
        'problem_name': 'string',
        'problem_type_id': 'num',
        'problem_parent_id': 'num',
        'resource_id': 'num',
        'problem_child_number': 'num',
        'problem_release_timestamp': 'datetime',
        'problem_soft_deadline': 'datetime',
        'problem_hard_deadline': 'datetime',
        'problem_max_submission': 'num',
    }
    
    t = vars['target']
    problem_inserter = db.StaggeredInsert(t['host'], t['user'], t['password'], t['port'], t['db'], 'problems', fields)
    problem_id_map = {}
    
    problems = vars['queries'].GetProblems(vars)
    problem_index = 1
    for problem in problems:
        problem['problem_id'] = problem_index
        problem_id_map[problem['problem_original_id']] = problem_index
        problem_index += 1
        
        problem['problem_parent_id'] = None
        if problem['problem_parent_original_id'] != None:
            problem['problem_parent_id'] = vars['id_maps']['tests'][problem['problem_parent_original_id']]
        
        problem['resource_id'] = None
        if problem['resource_original_id'] != None:
            problem['resource_id'] = vars['id_maps']['tests'][problem['resource_original_id']]
        problem_inserter.addRow({k: problem[k] for k in fields})
    
    problem_inserter.insertPendingRows()
    
    test_id_maps['problems'] = problem_id_map
    
    # Submissions, submission_events and assessments
    # -----------------------------------------------
    
    submission_index = 1
    assessment_index = 1
    tests = vars['queries'].GetTests(vars)
    ti = 0
    oetid = moocdb_utils.GetObservedEventTypeMap(vars)['problem_submission']
    for test in tests:
        ti += 1
        vars['logger'].Log(vars, "\tSubmissions, assessments, and observed events for test {} out of {}".format(ti, len(tests)))
        
        user_num_submissions = {}
        submission_assessment_data = vars['queries'].GetSubmissionAndAssessmentData(vars, test)
        
        submissions = []
        assessments = []
        observed_events = []
        
        for submission in submission_assessment_data:
            user_original_id = submission['user_original_id']
            user_id = vars['id_maps']['users'][user_original_id]
            if user_id not in user_num_submissions.keys():
                user_num_submissions[user_id] = 1
                
            submissions.append({
                'submission_id': submission_index,
                'user_id': user_id,
                'problem_id': problem_id_map[submission['problem_original_id']],
                'submission_timestamp': submission['submission_timestamp'],
                'submission_answer': submission['submission_answer'],
                'submission_attempt_number': user_num_submissions[user_id],
                'submission_is_submitted': 1,
            })
            user_num_submissions[user_id] += 1
            
            for assn in submission['assessments']:
                grader_id = 0 if assn['grader_original_id'] == 0 else vars['id_maps']['user'][assn['grader_original_id'].lower()]
                assessments.append({
                    'assessment_id': assessment_index,
                    'submission_id': submission_index,
                    'assessment_grade': assn['grade'],
                    'assessment_max_grade': assn['max_grade'],
                    'assessment_grader_id': grader_id,
                    'assessment_timestamp': assn['assessment_timestamp'],
                })
                assessment_index += 1
            
            observed_events.append({
                'observed_event_type_id': oetid,
                'user_id': user_id,
                'item_id': problem_id_map[problem['problem_original_id']],
                'observed_event_timestamp': submission['submission_timestamp'],
                'observed_event_data': '{}',
            })
            
            submission_index += 1
        
        InsertSubmissions(vars, submissions)
        InsertAssessments(vars, assessments)
        InsertObservedEvents(vars, observed_events)
    
    return test_id_maps
예제 #6
0
def TransformCollaborationData(vars):
    collaboration_id_maps = {}

    fields = {
        'collaboration_id': 'num',
        'collaboration_type_id': 'num',
        'user_id': 'num',
        'resource_id': 'num',
        'collaboration_content': 'string',
        'collaboration_timestamp': 'datetime',
        'collaboration_parent_id': 'num',
        'collaboration_child_number': 'num',
    }

    t = vars['target']
    collaboration_inserter = db.StaggeredInsert(t['host'], t['user'],
                                                t['password'], t['port'],
                                                t['db'], 'collaborations',
                                                fields)

    # Forum Posts
    ################
    collaborations = [
        {
            'type': 'forum_posts',
            'items': vars['queries'].GetForumPosts(vars),
            'parent_type': 'forum_posts',
            'resource_type': 'forums'
        },
        {
            'type': 'forum_votes',
            'items': vars['queries'].GetForumVotes(vars),
            'parent_type': 'forum_posts',
            'resource_type': 'forums'
        },
        {
            'type': 'wiki_edits',
            'items': vars['queries'].GetWikiEdits(vars),
            'parent_type': None,
            'resource_type': 'wikis'
        },
    ]
    #print collaborations
    coll_index = 1
    for coll_subset in collaborations:
        type = coll_subset['type']
        parent_type = coll_subset['parent_type']
        resource_type = coll_subset['resource_type']
        collaboration_id_maps[type] = {}

        for item in coll_subset['items']:
            item['collaboration_id'] = coll_index

            collaboration_id_maps[type][item['original_id']] = coll_index
            coll_index += 1

            add_item = True
            item['collaboration_parent_id'] = None
            cpoid = item['collaboration_parent_original_id']
            if parent_type != None and cpoid != None:
                if cpoid in collaboration_id_maps[parent_type].keys():
                    item['collaboration_parent_id'] = collaboration_id_maps[
                        parent_type][cpoid]
                else:
                    add_item = False

            item['resource_id'] = None
            roid = item['resource_original_id']
            if resource_type != None and roid != None:
                if roid in vars['id_maps'][resource_type].keys():
                    item['resource_id'] = vars['id_maps'][resource_type][roid]
                else:
                    add_item = False

            uoid = item['user_original_id']
            if uoid in vars['id_maps']['users'].keys():
                item['user_id'] = vars['id_maps']['users'][uoid]
            else:
                add_item = False

            if add_item:
                collaboration_inserter.addRow({k: item[k] for k in fields})

    collaboration_inserter.insertPendingRows()

    vars["logger"].Log(
        vars, "Counts: Inserted {} collaborations to target".format(
            collaboration_inserter.num_inserted_rows))

    return collaboration_id_maps
예제 #7
0
def TransformUserData(vars):
    # DB connections
    # --------------
    c = vars['core']
    core_db_selector = db.Selector(c['host'], c['user'], c['password'],
                                   c['port'], c['db'])

    # Populate the users table
    user_id_map = {}
    users = vars['queries'].GetUsers(vars)

    fields = {
        'user_id': 'num',
        'user_email': 'string',
        'user_type_id': 'num',
        'user_join_timestamp': 'datetime',
        'user_ip': 'ip',
        'user_country': 'string',
        'user_timezone_offset': 'num',
        'user_final_grade': 'num',
    }

    # IP-country lookup table
    ip_country_rows = [{
        'start': int(x['ip_numeric_start']),
        'stop': int(x['ip_numeric_stop']),
        'country_code': x['country_code']
    } for x in core_db_selector.query(
        "SELECT ip_numeric_start,ip_numeric_stop,country_code FROM ip_country ORDER BY ip_numeric_start"
    )]

    t = vars['target']
    user_inserter = db.StaggeredInsert(t['host'], t['user'], t['password'],
                                       t['port'], t['db'], 'users', fields)
    moocdb_user_id = 1
    for user in users:
        # User MOOCdb ID
        user['user_id'] = moocdb_user_id

        # User IP
        user['user_ip'] = db.ip_aton(user['user_ip'])

        # User email cannot be null
        if 'user_email' not in user.keys() or user['user_email'] == None:
            user['user_email'] = ''

        # User country
        if 'user_country' not in user.keys(): user['user_country'] = None
        if user['user_country'] == None and user[
                'user_ip'] != 'null':  # Note: Some platforms don't record IP, but do record country
            for ipc_row in ip_country_rows:
                if user['user_ip'] >= ipc_row['start'] and user[
                        'user_ip'] <= ipc_row['stop']:
                    user['user_country'] = ipc_row['country_code']
                    break

        # User timezone offset
        # We are computing it as the mean for the country since some platforms provide incorrect data for user timezone
        utzo = None
        if user['user_country'] != None:
            r = core_db_selector.query(
                "SELECT * FROM timezone WHERE country_code='{}'".format(
                    user['user_country']))
            if len(r) > 0:
                offsets = [x['gmt_offset'] for x in r]
                utzo = offsets[len(offsets) / 2]
        user['user_timezone_offset'] = utzo

        user_inserter.addRow(
            {k: user[k] if k in user.keys() else None
             for k in fields})

        user_id_map[user['original_id']] = moocdb_user_id
        moocdb_user_id += 1

    user_inserter.insertPendingRows()

    vars["logger"].Log(
        vars, "Counts: Inserted {} users to target".format(
            user_inserter.num_inserted_rows))

    return user_id_map