def load_data(): if 'data.npz' in os.listdir('.'): logger.info('loading saved features') return np.load('data.npz') elif 'cleared.npz' in os.listdir('.'): logger.info('use cleared features') data = np.load('cleared.npz') X = data['X'] Y = data['Y'] else: logger.info('loading json') f = open('data.json', 'r') data = json_wrapper.loads(f.read()) f.close() logger.info('json loaded') X = np.array(data['features'], np.float32) # clear features logger.info('clear features') X = VarianceThreshold().fit_transform(X) Y = np.array(data['grades'], np.float32) np.savez('cleared', **{'X': X, 'Y': Y}) logger.info('clear features done') Y = grades_to_labels(Y) logger.info('num of good students: ' + str(sum(map(lambda x: 1 if x == 1 else 0, Y)))) logger.info('num of normal students: ' + str(sum(map(lambda x: 1 if x == 0 else 0, Y)))) logger.info('num of poor students: ' + str(sum(map(lambda x: 1 if x == -1 else 0, Y)))) logger.info('feature selection') X = feature_selection(X, Y) X, Y = filtering(X, Y) logger.info('writing npz data') result = {'X': X, 'Y': Y} np.savez('data', **result) return result
def process(file_name, conn, term): f = open(file_name) content = f.read() obj = json_wrapper.loads(content) for name in obj: if obj[name]['category'] == "problem" and 'display_name' in obj[name][ 'metadata']: try: if '+' in name: #course_id = name.split('+')[1] #term_id = name.split('+')[2] xml_id = name.split('+')[-1].split('@')[-1] else: #course_id = "COMP102x" #term_id = "2T2014" xml_id = name.split('/')[-1] course_id = term.split('-')[0] term_id = term.split('-')[1] display_name = obj[name]['metadata']['display_name'] problem_type = ' '.join(display_name.split(' ')[0:2]) c = conn.cursor() c.execute( "INSERT INTO all_courses_problems VALUES(%s, %s, %s, %s, %s);", [course_id, term_id, xml_id, display_name, problem_type]) conn.commit() except Exception: print('name = ' + name) raise
def register_task(): """register a task request: { "desc": { "input_data_type_id": 121, "time-range": "1m", "functions": { "1": {"name": "filter", "target": "status", "conditions": [{"target": "cost", "operator": "bt", "param1": 100, "param2": 1000}, {}]}, "2": {"name": "average", "target": "cost", "tag": "avg_cost", "group_by": ["function", "status"]} } } } """ print(request.values.get('desc')) try: task_info = json_wrapper.loads(request.values.get('desc')) except: response = jsonify({'result': 'fail', 'info': 'broken json'}) return response print(str(task_info)) reg_result = __task_center.register_task(task_info) if reg_result['succ']: response = jsonify({'result': 'successful', 'task_id': reg_result['task_id']}) else: response = jsonify({'result': 'fail', 'info': reg_result['info']}) return response
def register_data_type(): """register a data type request: {"desc": {"title": "Title", "attrs": {"userid": "string", "function": "string", "status": "integer", "cost": "real"}}} """ try: data_type = json_wrapper.loads(request.values.get('desc')) except: response = jsonify({'result': 'fail', 'info': 'broken json'}) return response result = __data_center.register_data_type(data_type) # deal response due to the result response = jsonify({'result': 'unhandled'}) if result['succ']: response = jsonify({ 'result': 'successful', 'data_type_id': result['id'] }) else: if result['info'] == 'type_error': response = jsonify({ 'result': 'fail', 'info': 'type not accept: ' + str(result['attr']) }) if result['info'] == 'db_error': response = jsonify({'result': 'fail', 'info': 'database error'}) else: response = jsonify({'result': 'fail', 'info': result['info']}) return response
def register_task(): """register a task request: { "desc": { "input_data_type_id": 121, "time-range": "1m", "functions": { "1": {"name": "filter", "target": "status", "conditions": [{"target": "cost", "operator": "bt", "param1": 100, "param2": 1000}, {}]}, "2": {"name": "average", "target": "cost", "tag": "avg_cost", "group_by": ["function", "status"]} } } } """ print(request.values.get('desc')) try: task_info = json_wrapper.loads(request.values.get('desc')) except: response = jsonify({'result': 'fail', 'info': 'broken json'}) return response print(str(task_info)) reg_result = __task_center.register_task(task_info) if reg_result['succ']: response = jsonify({ 'result': 'successful', 'task_id': reg_result['task_id'] }) else: response = jsonify({'result': 'fail', 'info': reg_result['info']}) return response
def query_data(data_type_id, start_time, end_time): session = Session() rows = session.query(DataInfo.id, DataInfo.attrs, DataInfo.time)\ .filter(DataInfo.time >= start_time, DataInfo.time <= end_time, DataInfo.data_type_id == data_type_id) result = [] for info in rows: temp = {'id': info[0], 'attrs': json_wrapper.loads(info[1]), 'time': info[2]} result.append(temp) session.close() return result
def get_attempts(json): json = json.replace('\\\\', '\\') try: temp = json_wrapper.loads(json) except Exception: print json raise if 'attempts' in temp: return temp['attempts'] else: return 0
def query_task_by_id(task_id): session = Session() rows = session.query(TaskInfo.id, TaskInfo.input_data_type_id, TaskInfo.time_range, TaskInfo.functions, TaskInfo.result_data_type_id, TaskInfo.title).filter(TaskInfo.id == task_id) for info in rows: temp = {'id': int(info[0]), 'input_data_type_id': int(info[1]), 'time_range': json_wrapper.encode(info[2], 'utf-8'), 'functions': json_wrapper.loads(info[3]), 'result_data_type_id': int(info[4]), 'title': json_wrapper.encode(info[5], 'utf-8')} result = temp session.close() return result
def query_data(data_type_id, start_time, end_time): session = Session() rows = session.query(DataInfo.id, DataInfo.attrs, DataInfo.time)\ .filter(DataInfo.time >= start_time, DataInfo.time <= end_time, DataInfo.data_type_id == data_type_id) result = [] for info in rows: temp = { 'id': info[0], 'attrs': json_wrapper.loads(info[1]), 'time': info[2] } result.append(temp) session.close() return result
def process(file_name, term): f = open(file_name) line = f.readline() term = term.replace('.', '_').replace('-', '_') create_table(term + '_clickstream_events') while line: obj = json_wrapper.loads(line) text = [ obj['context']['user_id'] if 'user_id' in obj['context'] else None, obj['username'], obj['session'] if 'session' in obj else None, obj['event_type'], obj['name'] if 'name' in obj else None, obj['event_source'], obj['time'], obj['referer'] if 'referer' in obj else None ] insert_table(text, term + '_clickstream_events') line = f.readline() f.close()
def process(file_name, conn, term, module_id, split_type, split_id): f = open(file_name) term = term.replace('.', '_').replace('-', '_') create_table(conn, term + '_element') element = {} obj = json_wrapper.loads(f.read()) discussion_dict = {} for key in obj: if split_type(key) == 'discussion' and 'discussion_id' in obj[key]['metadata']: discussion_dict[split_id(key)] = obj[key]['metadata']['discussion_id'] for key in obj: child = [] if split_type(key) == 'course': continue else: for children in obj[key]['children']: if split_type(children) == 'discussion': child.append(split_type(children) + '@' + discussion_dict[split_id(children)]) else: child.append(split_type(children) + '@' + split_id(children)) element[split_type(key) + '@' + split_id(key)] = child for mid in module_id: name = mid.split('@')[0] num = mid.split('@')[-1] sequential = element['chapter@' + num] for sqt in sequential: vertical = element[sqt] for vert in vertical: real_element = element[vert] for relment in real_element: if len(element[relment]) > 0: lib_problem = element[relment] for lproblem in lib_problem: eid = lproblem.split('@')[-1] etype = lproblem.split('@')[0] print(etype + ' ' + eid) text = [num, name, etype, eid] insert_table(conn, text, term + '_element') else: eid = relment.split('@')[-1] etype = relment.split('@')[0] print(etype + '-------' + eid) text = [num, name, etype, eid] insert_table(conn, text, term + '_element') f.close()
def insert_comment_info_table(conn, file): f = open(file) content = f.read() obj = json_wrapper.loads(content) for element in obj['elements']: course_id = element['context']['definition']['courseId'] user_id = element['userId'] comment = element['comments']['generic']['definition']['value'] comment = clean_comment(comment) rating = element['rating']['value'] timestamp = element['timestamp'] if 'completed' in file: completed = 1 else: completed = 0 c = conn.cursor() c.execute( "INSERT INTO coursera_comment_info VALUES(%s, %s, %s, %s, %s, %s);", [course_id, user_id, comment, rating, timestamp, completed]) conn.commit()
def __init__(self, path=None): if path is None: self.__learn_rate = 0.01 self.__default_weight = 0.5 self.__input_nodes = [] self.__relations = {} self.__output_nodes = [] self.__weights = {} self.__thetas = {} else: file = open(path, 'r') structure = json_wrapper.loads(file.readline(), 'utf-8') file.close() self.__learn_rate = structure['learn_rate'] self.__default_weight = structure['default_weight'] self.__input_nodes = structure['input_nodes'] self.__output_nodes = structure['output_nodes'] self.__relations = structure['relations'] self.__weights = structure['weights'] self.__thetas = structure['thetas']
def put_data(): """put a data request: {"data": {"data_type_id": 5243212, "attrs": {"userid":123, "function": "GET/userinfo", "status": 200, "cost": 12.59}, "time" : "2015-09-01 10:50:39" } } """ try: data = json_wrapper.loads(request.values.get('data')) except: response = jsonify({'result': 'fail', 'info': 'broken json'}) return response result = __data_center.put_data(data) if result['succ']: response = jsonify({'result': 'successful'}) else: response = jsonify({'result': 'fail', 'info': result['info']}) return response
def process(file_name, conn, term): f = open(file_name) line = f.readline() term = term.replace('.', '_').replace('-', '_') commentthread.create_table(conn, term + '_commentthread') comment.create_table(conn, term + '_comment') while line: obj = json_wrapper.loads(line) if obj['_type'] == "CommentThread": commentthread_text = [obj['_id']['$oid'], obj['votes']['up_count'], obj['votes']['down_count'], obj['votes']['count'], obj['votes']['point'], obj['thread_type'], obj['comment_count'], obj['title'], obj['body'], obj['updated_at']['$date'], obj['created_at']['$date'], obj['last_activity_at']['$date'], obj['commentable_id'].split('-')[-1], obj['author_id'], obj['author_username']] commentthread.insert_table(conn, commentthread_text, term + '_commentthread') if obj['_type'] == "Comment": comment_text = [obj['comment_thread_id']['$oid'], obj['votes']['up_count'], obj['votes']['down_count'], obj['votes']['count'], obj['votes']['point'], obj['body'], obj['updated_at']['$date'], obj['created_at']['$date'], obj['author_id'], obj['author_username']] comment.insert_table(conn, comment_text, term + '_comment') line = f.readline() f.close()
def insert_course_info_table(conn, file, table): c = conn.cursor() f = open(file) content = json_wrapper.loads(f.read()) for item in content: p = [] for column in [ 'course_id', 'course_name', 'instructor', 'rating', 'level' ]: if column not in item: p.append(None) elif column == 'rating': p.append(float(item[column].split(' ')[0])) else: p.append(item[column]) try: c.execute("INSERT INTO " + table + " VALUES(%s, %s, %s, %s, %s);", p) except: print(p) raise conn.commit()
def register_data_type(): """register a data type request: {"desc": {"title": "Title", "attrs": {"userid": "string", "function": "string", "status": "integer", "cost": "real"}}} """ try: data_type = json_wrapper.loads(request.values.get('desc')) except: response = jsonify({'result': 'fail', 'info': 'broken json'}) return response result = __data_center.register_data_type(data_type) # deal response due to the result response = jsonify({'result': 'unhandled'}) if result['succ']: response = jsonify({'result': 'successful', 'data_type_id': result['id']}) else: if result['info'] == 'type_error': response = jsonify({'result': 'fail', 'info': 'type not accept: ' + str(result['attr'])}) if result['info'] == 'db_error': response = jsonify({'result': 'fail', 'info': 'database error'}) else: response = jsonify({'result': 'fail', 'info': result['info']}) return response
"SELECT module_id, student_id, grade, max_grade, course_id, state, created, modified, id FROM " + term + "_courseware_studentmodule" + " WHERE module_type = \"problem\" or module_type = \"openassessment\" and grade is not NULL;" ) result = cursor.fetchall() create_grades_table(conn, term + "_students_grades") for row in result: student_id = row[1] grade = row[2] max_grade = row[3] created = row[6] modified = row[7] state = row[5] state = state.replace('\\\\', '\\') try: obj = json_wrapper.loads(state) except Exception as e: print state print row[8] print term raise e if 'attempts' in obj: attempt = obj['attempts'] else: attempt = None if '+' in row[4]: course_id = row[4].split('+')[1] term_id = row[4].split('+')[-1] else: course_id = row[4].split('/')[1] term_id = row[4].split('/')[-1]
conn = MySQLdb.connect(host="localhost", user="******", passwd="Mdb4Learn", db="clickstream") for term in terms: table_name = ('HKUSTx-' + term + '-clickstream').replace('-', '_').replace( '.', '_') file_name = dir + 'HKUSTx-' + term + '-clickstream.log' create_table(conn, table_name) for row in read_file.read(file_name): row['time'] = truncate(row['time']) if row['event_type'] == 'load_video': event = row['event'] if isinstance(event, str): event = json_wrapper.loads(event) if 'currentTime' not in event: event['currentTime'] = None try: insert_table(conn, [ 'user_name', 'user_id', 'event_source', 'event_type', 'event_time', 'course_id', '`session`', 'video_id' ], [ row['username'], row['context']['user_id'], row['event_source'], row['event_type'], row['time'], row['context']['course_id'], row['session'], event['id'] ], table_name) except KeyError: print("exception found in term " + term) print(row['time'])
cursor.execute("SELECT xml_id, survey_type FROM Java_survey_questions;") result = cursor.fetchall() survey_map = {} for row in result: survey_map[row[0]] = row[1] # cursor = conn.cursor() for table in table_prefix: cursor.execute("SELECT module_type, module_id, student_id, state FROM " + table + "_courseware_studentmodule;") result = cursor.fetchall() term_id = table for row in result: if row[0] == 'problem': xml_id = row[1].split('@')[-1] student_id = int(row[2]) if xml_id in survey_map: survey_type = survey_map[xml_id] try: state = json_wrapper.loads(row[3].replace('\\\\', '\\')) except Exception: print(row[3]) raise if 'student_answers' in state: for key in state['student_answers']: question_id = int(key.split('_')[1]) - 1 if question_id < 15: answer = int(state['student_answers'][key].split('_')[1]) cursor.execute("INSERT INTO Java_survey_answers VALUES(%s, %s, %s, %s, %s, %s)", [student_id, xml_id, survey_type, question_id, answer, term_id]) conn.commit() conn.close()
def read(file_name): with open(file_name) as infile: for line in infile: yield json_wrapper.loads(line)
conn = MySQLdb.connect(host="localhost", user="******", passwd="Mdb4Learn", db="eLearning") for term in terms: term_id = term.split('-')[1] course_id = term.split('-')[0] table_name = ('HKUSTx-' + term + '-problem_set').replace('.', '_').replace( '-', '_') create_table(conn, table_name) cursor = conn.cursor() data = None problem_sets = {} with open(dir + 'HKUSTx-' + term + '-course_structure-prod-analytics.json', 'r') as infile: data = json_wrapper.loads(infile.read()) for key in data: if 'graded' in data[key]['metadata'] and data[key]['metadata'][ 'graded']: # xml_id = key.split('@')[-1] set_category = data[key]['metadata']['format'] set_name = data[key]['metadata']['display_name'] queue = Queue.Queue() for child in data[key]['children']: queue.put_nowait(child) while not queue.empty(): id = queue.get_nowait() category = data[id]['category'] if category in ['problem', 'openassessment']: if '@' in id: xml_id = id.split('@')[-1]
f = open( "/Users/maomaoyu/Downloads/e-learning/HKUSTx/hkustx-2016-09-11/HKUSTx-COMP102.1x-2T2015-prod.mongo" ) of1 = open( '/Users/maomaoyu/Downloads/e-learning/HKUSTx/hkustx-2016-09-11/commentthread.txt', 'w') of2 = open( '/Users/maomaoyu/Downloads/e-learning/HKUSTx/hkustx-2016-09-11/comment.txt', 'w') line = f.readline() # types = [] comment_text = [] commentthread_text = [] while line: obj = json_wrapper.loads(line) # print(obj['_type']) # threadtype if obj['_type'] == "CommentThread": commentthread_text = [ obj['_id']['$oid'], obj['votes']['up_count'], obj['votes']['down_count'], obj['votes']['count'], obj['votes']['point'], obj['thread_type'], obj['comment_count'], obj['title'], obj['body'], obj['updated_at']['$date'], obj['created_at']['$date'], obj['last_activity_at']['$date'] ] # for i in range(0, len(commentthread_text)): # commentthread_text[i] = str(commentthread_text[i]) commentthread_text = [str(x) for x in commentthread_text] output = '\t'.join(commentthread_text) output = output.replace('\n', '\\n')
# print(row) # continue if row['key'] == 'pageview': try: coursera.insert_table(conn, [ 'user_name', 'page_url', '`timestamp`', '`key`', '`session`' ], [ row['username'], row['page_url'], row['timestamp'], row['key'], row['session'] ], table_name) except Exception: print(row['timestamp']) raise elif row['key'] == 'user.video.lecture.action': value = json_wrapper.loads(row['value']) try: coursera.insert_table(conn, [ 'user_name', 'page_url', '`timestamp`', '`key`', '`session`', 'action_type', 'prev_time', 'cur_time', 'playback_rate' ], [ row['username'], row['page_url'], row['timestamp'], row['key'], row['session'], value['type'], value['prevTime'], value['currentTime'], value['playbackRate'] ], table_name) except Exception: print(row['timestamp']) raise
def query_data_type(): session = Session() rows = session.query(DataTypeInfo.id, DataTypeInfo.title, DataTypeInfo.attrs) result = [] for info in rows: temp = {'id': info[0], 'title': json_wrapper.encode(info[1], 'utf-8'), 'attrs': json_wrapper.loads(info[2])} result.append(temp) session.close() return result
def to_dict(self): _dict = {'id': self.id, 'title': json_wrapper.encode(self.title, 'utf-8'), 'attrs': json_wrapper.loads(self.attrs)} return _dict