def process_forum_entries(): """ input file: "forum_entries_courses.csv" header: service,course_id,kurs,semester,description,user,name,nid,id,parent_id,date,subject_length,text_length returns a dict with: (kurs,semester) -> "username" -> (written count [int], avg subject length [float], median text length [int]) """ KURS_COL = 2 SEMESTER_COL = 3 USER_COL = 5 SUBJ_COL = 11 TEXT_COL = 12 def handle_forum_entries(grouped): result = {} for username in grouped: if not is_valid_matrikel_nummer(username): continue value = grouped[username] written_count = len(value) subject_lengths = [int(line[SUBJ_COL]) for line in value] text_lengths = [int(line[TEXT_COL]) for line in value] result[username] = (written_count, average(subject_lengths), median(text_lengths)) return result lines = load_lines("forum_entries_courses.csv") by_kurs_semester = groupby(lines, [KURS_COL, SEMESTER_COL]) result = groupby(by_kurs_semester, [USER_COL], handle_forum_entries) return result
def process_entries_read(kurs_mapping): """ input: kurs_mapping - dict with: forum_course_id -> (kurs,semester) input file: "forum_readlist.py" header: service,course_id,username,nid,id returns a dict with (kurs,semester) -> "username" -> read count [integer] """ COURSE_ID_COL = 1 USERNAME_COL = 2 def calculate_read(grouped): return dict((username, len(grouped[username])) for username in grouped\ if is_valid_matrikel_nummer(username)) lines = load_lines("forum_readlist.csv") by_course_id = groupby(lines, [COURSE_ID_COL]) by_user = groupby(by_course_id, [USERNAME_COL], calculate_read) result = {} for course_id in by_course_id: key = kurs_mapping[course_id] result[key] = by_user[course_id] return result
def process_median_feedback_length(): """ input file: "abgabe_feedback_courses.csv" header: service,course_id,kurs,semester,description,user_id,task,subtask,author,comment_length returns a dict with (kurs,semester) -> "username" -> median feedback [float] """ KURS_COL = 2 SEMESTER_COL = 3 USER_COL = 5 COMMENT_LENGTH_COL = 9 def calculate_feedback(grouped): result = {} for username in grouped: if not is_valid_matrikel_nummer(username): continue value = grouped[username] feedback_lengths = [int(line[COMMENT_LENGTH_COL]) for line in value] median_feedback = median(feedback_lengths) result[username] = median_feedback return result lines = load_lines("abgabe_feedback_courses.csv") by_kurs_semester = groupby(lines, [KURS_COL, SEMESTER_COL]) result = groupby(by_kurs_semester, [USER_COL], calculate_feedback) return result
def get_pid(self): """Return int pid from a potential pid file. Returns None if pid file doesn't exist.""" pid = load_lines(self.file_name, line_count=1).strip() if pid: return int(pid) else: return None
def process_kurs_mapping(): """ input file: "forum_entries_courses.csv" header: service,course_id,kurs,semester,description,user,name,nid,id,parent_id,date,subject_length,text_length returns a dict with: (course_id) -> (kurs,semester) """ KURS_COL = 2 SEMESTER_COL = 3 COURSE_ID_COL = 1 lines = load_lines("forum_entries_courses.csv") by_kurs_semester = groupby(lines, [KURS_COL, SEMESTER_COL]) return dict((by_kurs_semester[x][0][COURSE_ID_COL], x)\ for x in by_kurs_semester)
def is_stopped(self): """Sample is_stopped() implementation checking command file for command to execute.""" stop_status = False command_file_name = f'{script_name()}.command' command = load_lines(command_file_name, line_count=1) command = command.strip().lower() if command: logger.info(f'Command: {command}') delete_file(command_file_name) if command in ('die', 'exit', 'kill', 'quit', 'stop'): stop_status = True if command in ('diagnostics', 'dump', 'info'): self.dump() return stop_status
def process_avg_score(): """ input file: "abgabe_assessment_results_courses.csv" header: service,course_id,kurs,semester,description,user_id,result_id,result_value returns a dict with (kurs,semester) -> "username" -> (quality score [float], avg score [float], number of scores [int]) where quality score: avg score * number of scores """ KURS_COL = 2 SEMESTER_COL = 3 USER_COL = 5 RESULT_ID_COL = 6 RESULT_SCORE_COL = 7 def calculate_avg_score(grouped): result = {} for username in grouped: if not is_valid_matrikel_nummer(username): continue value = grouped[username] by_result_id = groupby(value, [RESULT_ID_COL]) result_scores = [float(line[0][RESULT_SCORE_COL]) \ for line in by_result_id.values()] avg_score = average(result_scores) num_scores = len(by_result_id.keys()) max_score = max(result_scores) if max_score == 0: quality_score = 0 else: quality_score = float(sum(result_scores)) / float(max_score) result[username] = (quality_score, avg_score, num_scores) return result lines = load_lines("abgabe_assessment_results_courses.csv") by_kurs_semester = groupby(lines, [KURS_COL, SEMESTER_COL]) result = groupby(by_kurs_semester, [USER_COL], calculate_avg_score) return result
def process_plus_count(): """ input file: "abgabe_assessment_pluses_courses.csv" header: service,course_id,kurs,semester,description,user_id,plus_date returns a dict with (kurs,semester) -> "username" -> plus count [integer] """ def count_pluses(d): return dict((x, len(d[x])) for x in d\ if is_valid_matrikel_nummer(x)) lines = load_lines("abgabe_assessment_pluses_courses.csv") KURS_COL = 2 SEMESTER_COL = 3 USER_COL = 5 by_kurs_semester = groupby(lines, [KURS_COL, SEMESTER_COL]) result = groupby(by_kurs_semester, [USER_COL], count_pluses) return result