def parse_tsv_files(input_path, output_dir): if not os.path.exists(input_path): sys.exit("Error: invalid input path '{}'".format(input_path)) input_files = [] if os.path.isfile(input_path): input_files.append(input_path) else: for (root, dirs, files) in os.walk(input_path): for file_x in files: if file_x.endswith(".tsv"): input_files.append(path_join(root,file_x)) if os.path.exists(output_dir): if os.path.isfile(output_dir): sys.exit("Error: Out arg must be directory.") else: os.makedirs(output_dir) for tsv_filename in input_files: coursename = tsv_filename.replace(".tsv","") coursename = coursename.replace(input_path, "") coursename = coursename.replace("/", "") content = parse_course_tsv(tsv_filename) dump_json(content, path_join(output_dir,coursename)+".json")
def get_args(): argparser = argparse.ArgumentParser( description="Parse tsv file(s) and output json course data") argparser.add_argument("--output", "-o", help="Output dir", type=str) argparser.add_argument("--input", "-i", help="Input dir/file", type=str) argparser.add_argument("--semester", "-s", default="all", help="Semester", type=str) args = argparser.parse_args() if args.semester: if args.semester == "all": dirs = next(os.walk('data'))[1] for d in dirs: if d.replace("/", "") == "all": sys.exit("Error: Recursion check failed - 'all' folder!") if d[0] != ".": os.system("python3 fui_kk/responses.py -s " + d) sys.exit() else: args.input = path_join("data", args.semester, "downloads/tsv") args.output = path_join("data", args.semester, "outputs/responses") if not args.input or not args.output: sys.exit( "Error: Specify input and output using -i and -o parameters, or semester using -s parameter" ) return args
def parse_tsv_files(input_path, output_dir): if not os.path.exists(input_path): sys.exit("Error: invalid input path '{}'".format(input_path)) input_files = [] if os.path.isfile(input_path): input_files.append(input_path) else: for (root, dirs, files) in os.walk(input_path): for file_x in files: if file_x.endswith(".tsv"): input_files.append(path_join(root, file_x)) if os.path.exists(output_dir): if os.path.isfile(output_dir): sys.exit("Error: Out arg must be directory.") else: os.makedirs(output_dir) for tsv_filename in input_files: coursename = tsv_filename.replace(".tsv", "") coursename = coursename.replace(input_path, "") coursename = coursename.replace("/", "") content = parse_course_tsv(tsv_filename) dump_json(content, path_join(output_dir, coursename) + ".json")
def get_args(): argparser = argparse.ArgumentParser( description = "Parse tsv file(s) and output json course data") argparser.add_argument("--output", "-o", help="Output dir", type=str) argparser.add_argument("--input", "-i", help="Input dir/file", type=str) argparser.add_argument("--semester", "-s", default="all", help="Semester", type=str) args = argparser.parse_args() if args.semester: if args.semester == "all": dirs = next(os.walk('data'))[1] for d in dirs: if d.replace("/", "") == "all": sys.exit("Error: Recursion check failed - 'all' folder!") if d[0] != ".": os.system("python3 fui_kk/responses.py -s "+d) sys.exit() else: args.input = path_join("data",args.semester,"downloads/tsv") args.output = path_join("data",args.semester,"outputs/responses") if not args.input or not args.output: sys.exit("Error: Specify input and output using -i and -o parameters, or semester using -s parameter") return args
def web_reports_semester_folder(semester_path): semester = os.path.basename(semester_path) courses = load_json(semester_path + "/outputs/courses.json") scales = load_json(semester_path + "/outputs/scales.json") stats_path = semester_path + "/outputs/stats/" summaries_path = semester_path + "/outputs/web/converted" upload_path = semester_path + "/outputs/web/upload/" + semester html_templates = {} with open("./resources/web/course-no.html", 'r') as f: html_templates["NO"] = f.read() with open("./resources/web/course-en.html", 'r') as f: html_templates["EN"] = f.read() with open("./resources/web/semester-index.html", 'r') as f: html_templates["index"] = f.read() with open("./resources/web/semester-index-eng.html", 'r') as f: html_templates["index-eng"] = f.read() with open("./data/courses.json", 'r') as f: courses_all = json.load(f, object_pairs_hook=OrderedDict) links = [] links.append('<ul class="fui_courses">') for course_code in courses: summary_path = path_join(summaries_path, course_code + ".html") stat_path = path_join(stats_path, course_code + ".json") output_path = path_join(upload_path, course_code + ".html") res = web_report_course(summary_path, stat_path, output_path, html_templates, courses_all, scales, semester) if res: course_name = courses[course_code]["course"]["name"] links.append('<li><a href="' + course_code + '.html">' + course_code + ' - ' + course_name + '</a></li>') links.append("</ul>") links_str = "\n".join(links) letter, year = semester[0], semester[1:] title = {} if letter == "H": title["NO"] = "Høst " + year title["EN"] = "Fall " + year elif letter == "V": title["NO"] = "Vår " + year title["EN"] = "Spring " + year else: print("Error: unknown semester format: " + semester) sys.exit(1) links_str_no = "<h2>{}</h2>".format(title["NO"]) + links_str links_str_en = "<h2>{}</h2>".format(title["EN"]) + links_str index_html = html_templates["index"].replace("$COURSE_INDEX", links_str_no) index_eng_html = html_templates["index-eng"].replace( "$COURSE_INDEX", links_str_en) index_html = index_html.replace("$SEMESTER", semester) index_eng_html = index_eng_html.replace("$SEMESTER", semester) with open(upload_path + "/index.html", "w") as f: f.write(index_html) with open(upload_path + "/index-eng.html", "w") as f: f.write(index_eng_html)
def web_reports_semester_folder(semester_path): semester = os.path.basename(semester_path) courses = load_json(semester_path+"/outputs/courses.json") scales = load_json(semester_path+"/outputs/scales.json") stats_path = semester_path+"/outputs/stats/" summaries_path = semester_path+"/outputs/web/converted" upload_path = semester_path+"/outputs/web/upload/"+semester html_templates = {} with open("./resources/web/course-no.html",'r') as f: html_templates["NO"] = f.read() with open("./resources/web/course-en.html",'r') as f: html_templates["EN"] = f.read() with open("./resources/web/semester-index.html",'r') as f: html_templates["index"] = f.read() with open("./resources/web/semester-index-eng.html",'r') as f: html_templates["index-eng"] = f.read() with open("./data/courses.json",'r') as f: courses_all = json.load(f, object_pairs_hook=OrderedDict) links = [] links.append('<ul class="fui_courses">') for course_code in courses: summary_path = path_join(summaries_path, course_code+".html") stat_path = path_join(stats_path, course_code+".json") output_path = path_join(upload_path, course_code+".html") res = web_report_course(summary_path, stat_path, output_path, html_templates, courses_all, scales, semester) if res: course_name = courses[course_code]["course"]["name"] links.append('<li><a href="'+course_code+'.html">' + course_code + ' - ' + course_name + '</a></li>') links.append("</ul>") links_str = "\n".join(links) letter, year = semester[0],semester[1:] title = {} if letter == "H": title["NO"] = "Høst "+year title["EN"] = "Fall "+year elif letter == "V": title["NO"] = "Vår "+year title["EN"] = "Spring "+year else: print("Error: unknown semester format: " + semester) sys.exit(1) links_str_no = "<h2>{}</h2>".format(title["NO"]) + links_str links_str_en = "<h2>{}</h2>".format(title["EN"]) + links_str index_html = html_templates["index"].replace("$COURSE_INDEX", links_str_no) index_eng_html = html_templates["index-eng"].replace("$COURSE_INDEX", links_str_en) index_html = index_html.replace("$SEMESTER", semester) index_eng_html = index_eng_html.replace("$SEMESTER", semester) with open(upload_path+"/index.html", "w") as f: f.write(index_html) with open(upload_path+"/index-eng.html", "w") as f: f.write(index_eng_html)
def main(): args = get_args() delete = args.delete exclude_pattern = re.compile(args.exclude) semester_pattern = re.compile(r'(V|H)[0-9]{4}') course_code_pattern = re.compile(r'(([A-Z]{1,5}-){0,1}[A-Z]{1,5}[0-9]{3,4})([A-Z]{1,5}){0,1}') for root, subdirs, files in os.walk(args.input): for file_x in files: path = path_join(root, file_x) filename, extension = os.path.splitext(path) m = exclude_pattern.search(path) if m is not None or path[0] == ".": print("Excluded: " + path) continue m = semester_pattern.search(path) if m is None: print("Skipped - No semester: " + path) continue semester = m.group(0) m = course_code_pattern.search(path) if m is None: print("Skipped - No course code: " + path) continue course = m.group(0) dir_name = extension[1:] if dir_name == "json": dir_name = "participation" target_folder = path_join(args.output, semester, "downloads", dir_name) os.makedirs( target_folder, exist_ok=True ) newpath = path_join(target_folder, course + extension ) if delete: # I hate windows: try: os.remove(newpath) except: pass os.rename(path, newpath) else: copyfile(path, newpath) if args.verbose: print(path) print(" -> "+newpath) print(root) while delete: delete = False for root, subdirs, files in os.walk(args.input): if len(subdirs) == 0 and len(files) == 0: os.rmdir(root) if args.verbose: print("rm: "+path) delete = True
def write_to_file(folder, name, extension, content): if not os.path.exists(folder): os.makedirs(folder) filename = path_join(folder, name) + '.' + extension filename = path_clean(filename) with open(filename, 'w', encoding="utf-8") as f: f.write(content)
def generate_scales(semester): scales = OrderedDict() scales_path = "./data/"+semester+"/outputs/scales.json" default_scales_path = "./resources/scales.json" if not os.path.exists(scales_path): scales = load_json(default_scales_path) else: scales = load_json(scales_path) if not scales: scales = OrderedDict() q = "Remove this example question - How do you rate the course in general? (Add questions like this)" scales[q] = OrderedDict() convert_answer_case(scales) responses_path = "./data/"+semester+"/outputs/responses/" for (dirpath, dirnames, filenames) in os.walk(responses_path): for filename in filenames: if filename.endswith(".json"): file_path = path_join(dirpath,filename) scales_add_course(file_path, scales) break default_sort_scales(scales) try: autofill_scales(scales) except AutofillException: save_prompt_exit(scales, scales_path) dump_json(scales, scales_path) if print_error_check(scales): print("One or more inconsistency detected in " + scales_path) print("You will have to edit the file manually to add/edit/remove questions.") sys.exit(1)
def generate_stats_dir(responses_dir, participation_dir, output_dir, scales, course_names, semester_name): for filename in os.listdir(responses_dir): if ".json" in filename: course = OrderedDict() course_code = os.path.splitext(filename)[0] try: course_name = course_names[course_code] except KeyError: course_name = "Unknown" print("Warning: could not find name for course " + course_code) course["code"] = course_code course["name"] = course_name course["semester"] = semester_name responses_path = path_join(responses_dir,filename) participation_path = path_join(participation_dir,filename) output_path = path_join(output_dir, filename) generate_stats_file(responses_path, participation_path, output_path, scales, course)
def generate_stats_dir(responses_dir, participation_dir, output_dir, scales, course_names, semester_name): for filename in os.listdir(responses_dir): if ".json" in filename: course = OrderedDict() course_code = os.path.splitext(filename)[0] try: course_name = course_names[course_code] except KeyError: course_name = "Unknown" print("Warning: could not find name for course " + course_code) course["code"] = course_code course["name"] = course_name course["semester"] = semester_name responses_path = path_join(responses_dir, filename) participation_path = path_join(participation_dir, filename) output_path = path_join(output_dir, filename) generate_stats_file(responses_path, participation_path, output_path, scales, course)
def generate_scales(semester): scales = OrderedDict() scales_path = "./data/" + semester + "/outputs/scales.json" default_scales_path = "./resources/scales.json" if not os.path.exists(scales_path): scales = load_json(default_scales_path) else: scales = load_json(scales_path) if not scales: scales = OrderedDict() q = "Remove this example question - How do you rate the course in general? (Add questions like this)" scales[q] = OrderedDict() convert_answer_case(scales) responses_path = "./data/" + semester + "/outputs/responses/" for (dirpath, dirnames, filenames) in os.walk(responses_path): for filename in filenames: if filename.endswith(".json"): file_path = path_join(dirpath, filename) scales_add_course(file_path, scales) break default_sort_scales(scales) try: autofill_scales(scales) except AutofillException: save_prompt_exit(scales, scales_path) dump_json(scales, scales_path) if print_error_check(scales): print("One or more inconsistency detected in " + scales_path) print( "You will have to edit the file manually to add/edit/remove questions." ) sys.exit(1)
output_path = path_join(output_dir, filename) generate_stats_file(responses_path, participation_path, output_path, scales, course) def generate_stats_semester(semester_path, semester_name): scales_path = semester_path + "/outputs/scales.json" scales = load_json(scales_path) course_names = load_json("./resources/course_names/all.json") generate_stats_dir(semester_path + "/outputs/responses", semester_path + "/downloads/participation", semester_path + "/outputs/stats", scales, course_names, semester_name) if __name__ == '__main__': if len(sys.argv) == 1 or not os.path.isdir(sys.argv[1]): sys.exit("Must specify dir") directory = sys.argv[1] semester_dirs = [] for (root, dirs, files) in os.walk(directory): for d in dirs: if "." not in d: semester_dirs.append(path_join(root, d)) # TODO: Move this somewhere else: os.makedirs(path_join(root, d, "inputs", "md"), exist_ok=True) os.makedirs(path_join(root, d, "inputs", "tex"), exist_ok=True) break for d in semester_dirs: generate_stats_semester(d, os.path.basename(d))
def download_files(driver, args): downloaded = read_list(args.out + "/downloaded.txt") formdata = read_binary(args.out + "/formdata.dat") if not formdata: driver.get('https://nettskjema.uio.no/user/form/list.html') forms = driver.find_elements_by_css_selector('.forms .formName') formdata = [(form.text, form.get_attribute('href')) for form in forms] write_binary(args.out + "/formdata.dat", formdata) if args.filter: filtered = [x for x in formdata if args.filter in x[0]] print('Filter matched {} of {} forms'.format(len(filtered), len(formdata))) formdata = filtered session = requests.Session() cookies = driver.get_cookies() for cookie in cookies: session.cookies.set(cookie['name'], cookie['value']) out_path = path_clean(args.out) tsv_path = path_join(out_path, 'tsv') html_path = path_join(out_path, 'html') stats_path = path_join(out_path, 'stats') for (name, url) in formdata: form_id = get_id(url) try: if form_id in downloaded: print("Skipping {} (id={})".format(name, form_id)) continue print("Fetching {} (id={})".format(name, form_id)) except UnicodeEncodeError as e: # NOTE: This error can be fixed by using os_encode on name, # however I think it is useful to force windows users # to change to utf-8, just in case wrong encoding # causes problems elsewhere. error_msg = "\n".join([ "Form id={}".format(form_id), "Form name: {}".format(os_encode(name)), "Your terminal probably doesn't like unicode.", "To fix this on windows, change codepage using this command:", "chcp 65001" ]) error(error_msg, e, label="Non-unicode codepage") results_url = url.replace('preview', 'results') driver.get(results_url) stats = { 'answered': try_to_find_int(driver, '.delivered-submissions .number'), 'started': try_to_find_int(driver, '.saved-submissions .number'), 'invited': try_to_find_int(driver, '.valid-invitations .number') } name_cleaned = filename_clean(name) if args.tsv: tsv_url = url.replace('preview', 'download') + '&encoding=utf-8' response = session.get(tsv_url) write_to_file(tsv_path, name_cleaned, 'tsv', response.text) if args.html: html_url = url.replace( 'preview', 'report/web') + '&include-open=1&remove-profile=1' response = session.get(html_url) write_to_file(html_path, name_cleaned, 'html', render_html(name, stats, response.text)) if args.stats: stats_json = json.dumps(stats) write_to_file(stats_path, name_cleaned, 'json', stats_json) with open(args.out + "/downloaded.txt", 'a') as f: f.write(form_id + "\n")
course["semester"] = semester_name responses_path = path_join(responses_dir,filename) participation_path = path_join(participation_dir,filename) output_path = path_join(output_dir, filename) generate_stats_file(responses_path, participation_path, output_path, scales, course) def generate_stats_semester(semester_path, semester_name): scales_path = semester_path+"/outputs/scales.json" scales = load_json(scales_path) course_names = load_json("./resources/course_names/all.json") generate_stats_dir(semester_path+"/outputs/responses", semester_path+"/downloads/participation", semester_path+"/outputs/stats", scales, course_names, semester_name) if __name__ == '__main__': if len(sys.argv) == 1 or not os.path.isdir(sys.argv[1]): sys.exit("Must specify dir") directory = sys.argv[1] semester_dirs = [] for (root, dirs, files) in os.walk(directory): for d in dirs: if "." not in d: semester_dirs.append(path_join(root, d)) # TODO: Move this somewhere else: os.makedirs(path_join(root,d,"inputs","md"), exist_ok=True) os.makedirs(path_join(root,d,"inputs","tex"), exist_ok=True) break for d in semester_dirs: generate_stats_semester(d, os.path.basename(d))
def download_files(driver, args): downloaded = read_list(args.out+"/downloaded.txt") formdata = read_binary(args.out+"/formdata.dat") if not formdata: driver.get('https://nettskjema.uio.no/user/form/list.html') forms = driver.find_elements_by_css_selector('.forms .formName') formdata = [(form.text, form.get_attribute('href')) for form in forms] write_binary(args.out+"/formdata.dat",formdata) if args.filter: filtered = [x for x in formdata if args.filter in x[0]] print('Filter matched {} of {} forms'.format(len(filtered), len(formdata))) formdata = filtered session = requests.Session() cookies = driver.get_cookies() for cookie in cookies: session.cookies.set(cookie['name'], cookie['value']) out_path = path_clean(args.out) tsv_path = path_join(out_path, 'tsv') html_path = path_join(out_path, 'html') stats_path = path_join(out_path, 'stats') for (name, url) in formdata: form_id = get_id(url) try: if form_id in downloaded: print("Skipping {} (id={})".format(name,form_id)) continue print("Fetching {} (id={})".format(name,form_id)) except UnicodeEncodeError as e: # NOTE: This error can be fixed by using os_encode on name, # however I think it is useful to force windows users # to change to utf-8, just in case wrong encoding # causes problems elsewhere. error_msg = "\n".join([ "Form id={}".format(form_id), "Form name: {}".format(os_encode(name)), "Your terminal probably doesn't like unicode.", "To fix this on windows, change codepage using this command:", "chcp 65001" ]) error(error_msg, e, label="Non-unicode codepage") results_url = url.replace('preview', 'results') driver.get(results_url) stats = { 'answered': try_to_find_int(driver, '.delivered-submissions .number'), 'started': try_to_find_int(driver, '.saved-submissions .number'), 'invited': try_to_find_int(driver, '.valid-invitations .number') } name_cleaned = filename_clean(name) if args.tsv: tsv_url = url.replace('preview', 'download') + '&encoding=utf-8' response = session.get(tsv_url) write_to_file(tsv_path, name_cleaned, 'tsv', response.text) if args.html: html_url = url.replace('preview', 'report/web') + '&include-open=1&remove-profile=1' response = session.get(html_url) write_to_file(html_path, name_cleaned, 'html', render_html(name, stats, response.text)) if args.stats: stats_json = json.dumps(stats) write_to_file(stats_path, name_cleaned, 'json', stats_json) with open(args.out+"/downloaded.txt", 'a') as f: f.write(form_id+"\n")