Пример #1
0
def parse_tsv_files(input_path, output_dir):
    if not os.path.exists(input_path):
        sys.exit("Error: invalid input path '{}'".format(input_path))

    input_files = []
    if os.path.isfile(input_path):
        input_files.append(input_path)
    else:
        for (root, dirs, files) in os.walk(input_path):
            for file_x in files:
                if file_x.endswith(".tsv"):
                    input_files.append(path_join(root,file_x))

    if os.path.exists(output_dir):
        if os.path.isfile(output_dir):
            sys.exit("Error: Out arg must be directory.")
    else:
        os.makedirs(output_dir)

    for tsv_filename in input_files:
        coursename = tsv_filename.replace(".tsv","")
        coursename = coursename.replace(input_path, "")
        coursename = coursename.replace("/", "")
        content = parse_course_tsv(tsv_filename)
        dump_json(content, path_join(output_dir,coursename)+".json")
Пример #2
0
def get_args():
    argparser = argparse.ArgumentParser(
        description="Parse tsv file(s) and output json course data")
    argparser.add_argument("--output", "-o", help="Output dir", type=str)
    argparser.add_argument("--input", "-i", help="Input dir/file", type=str)
    argparser.add_argument("--semester",
                           "-s",
                           default="all",
                           help="Semester",
                           type=str)

    args = argparser.parse_args()

    if args.semester:
        if args.semester == "all":
            dirs = next(os.walk('data'))[1]
            for d in dirs:
                if d.replace("/", "") == "all":
                    sys.exit("Error: Recursion check failed - 'all' folder!")
                if d[0] != ".":
                    os.system("python3 fui_kk/responses.py -s " + d)
            sys.exit()
        else:
            args.input = path_join("data", args.semester, "downloads/tsv")
            args.output = path_join("data", args.semester, "outputs/responses")

    if not args.input or not args.output:
        sys.exit(
            "Error: Specify input and output using -i and -o parameters, or semester using -s parameter"
        )
    return args
Пример #3
0
def parse_tsv_files(input_path, output_dir):
    if not os.path.exists(input_path):
        sys.exit("Error: invalid input path '{}'".format(input_path))

    input_files = []
    if os.path.isfile(input_path):
        input_files.append(input_path)
    else:
        for (root, dirs, files) in os.walk(input_path):
            for file_x in files:
                if file_x.endswith(".tsv"):
                    input_files.append(path_join(root, file_x))

    if os.path.exists(output_dir):
        if os.path.isfile(output_dir):
            sys.exit("Error: Out arg must be directory.")
    else:
        os.makedirs(output_dir)

    for tsv_filename in input_files:
        coursename = tsv_filename.replace(".tsv", "")
        coursename = coursename.replace(input_path, "")
        coursename = coursename.replace("/", "")
        content = parse_course_tsv(tsv_filename)
        dump_json(content, path_join(output_dir, coursename) + ".json")
Пример #4
0
def get_args():
    argparser = argparse.ArgumentParser(
                description = "Parse tsv file(s) and output json course data")
    argparser.add_argument("--output", "-o", help="Output dir", type=str)
    argparser.add_argument("--input", "-i", help="Input dir/file", type=str)
    argparser.add_argument("--semester", "-s", default="all", help="Semester", type=str)

    args = argparser.parse_args()

    if args.semester:
        if args.semester == "all":
            dirs = next(os.walk('data'))[1]
            for d in dirs:
                if d.replace("/", "") == "all":
                    sys.exit("Error: Recursion check failed - 'all' folder!")
                if d[0] != ".":
                    os.system("python3 fui_kk/responses.py -s "+d)
            sys.exit()
        else:
            args.input = path_join("data",args.semester,"downloads/tsv")
            args.output = path_join("data",args.semester,"outputs/responses")

    if not args.input or not args.output:
        sys.exit("Error: Specify input and output using -i and -o parameters, or semester using -s parameter")
    return args
Пример #5
0
def web_reports_semester_folder(semester_path):
    semester = os.path.basename(semester_path)
    courses = load_json(semester_path + "/outputs/courses.json")
    scales = load_json(semester_path + "/outputs/scales.json")
    stats_path = semester_path + "/outputs/stats/"
    summaries_path = semester_path + "/outputs/web/converted"
    upload_path = semester_path + "/outputs/web/upload/" + semester

    html_templates = {}
    with open("./resources/web/course-no.html", 'r') as f:
        html_templates["NO"] = f.read()
    with open("./resources/web/course-en.html", 'r') as f:
        html_templates["EN"] = f.read()
    with open("./resources/web/semester-index.html", 'r') as f:
        html_templates["index"] = f.read()
    with open("./resources/web/semester-index-eng.html", 'r') as f:
        html_templates["index-eng"] = f.read()
    with open("./data/courses.json", 'r') as f:
        courses_all = json.load(f, object_pairs_hook=OrderedDict)

    links = []
    links.append('<ul class="fui_courses">')
    for course_code in courses:
        summary_path = path_join(summaries_path, course_code + ".html")
        stat_path = path_join(stats_path, course_code + ".json")
        output_path = path_join(upload_path, course_code + ".html")

        res = web_report_course(summary_path, stat_path, output_path,
                                html_templates, courses_all, scales, semester)
        if res:
            course_name = courses[course_code]["course"]["name"]
            links.append('<li><a href="' + course_code + '.html">' +
                         course_code + ' - ' + course_name + '</a></li>')
    links.append("</ul>")
    links_str = "\n".join(links)

    letter, year = semester[0], semester[1:]
    title = {}
    if letter == "H":
        title["NO"] = "Høst " + year
        title["EN"] = "Fall " + year
    elif letter == "V":
        title["NO"] = "Vår " + year
        title["EN"] = "Spring " + year
    else:
        print("Error: unknown semester format: " + semester)
        sys.exit(1)

    links_str_no = "<h2>{}</h2>".format(title["NO"]) + links_str
    links_str_en = "<h2>{}</h2>".format(title["EN"]) + links_str
    index_html = html_templates["index"].replace("$COURSE_INDEX", links_str_no)
    index_eng_html = html_templates["index-eng"].replace(
        "$COURSE_INDEX", links_str_en)
    index_html = index_html.replace("$SEMESTER", semester)
    index_eng_html = index_eng_html.replace("$SEMESTER", semester)

    with open(upload_path + "/index.html", "w") as f:
        f.write(index_html)
    with open(upload_path + "/index-eng.html", "w") as f:
        f.write(index_eng_html)
Пример #6
0
def web_reports_semester_folder(semester_path):
    semester = os.path.basename(semester_path)
    courses = load_json(semester_path+"/outputs/courses.json")
    scales = load_json(semester_path+"/outputs/scales.json")
    stats_path = semester_path+"/outputs/stats/"
    summaries_path = semester_path+"/outputs/web/converted"
    upload_path = semester_path+"/outputs/web/upload/"+semester

    html_templates = {}
    with open("./resources/web/course-no.html",'r') as f:
        html_templates["NO"] = f.read()
    with open("./resources/web/course-en.html",'r') as f:
        html_templates["EN"] = f.read()
    with open("./resources/web/semester-index.html",'r') as f:
        html_templates["index"] = f.read()
    with open("./resources/web/semester-index-eng.html",'r') as f:
        html_templates["index-eng"] = f.read()
    with open("./data/courses.json",'r') as f:
        courses_all = json.load(f, object_pairs_hook=OrderedDict)

    links = []
    links.append('<ul class="fui_courses">')
    for course_code in courses:
        summary_path = path_join(summaries_path, course_code+".html")
        stat_path = path_join(stats_path, course_code+".json")
        output_path = path_join(upload_path, course_code+".html")

        res = web_report_course(summary_path, stat_path, output_path, html_templates, courses_all, scales, semester)
        if res:
            course_name = courses[course_code]["course"]["name"]
            links.append('<li><a href="'+course_code+'.html">' + course_code + ' - ' + course_name + '</a></li>')
    links.append("</ul>")
    links_str = "\n".join(links)

    letter, year = semester[0],semester[1:]
    title = {}
    if letter == "H":
        title["NO"] = "Høst "+year
        title["EN"] = "Fall "+year
    elif letter == "V":
        title["NO"] = "Vår "+year
        title["EN"] = "Spring "+year
    else:
        print("Error: unknown semester format: " + semester)
        sys.exit(1)

    links_str_no   = "<h2>{}</h2>".format(title["NO"]) + links_str
    links_str_en   = "<h2>{}</h2>".format(title["EN"]) + links_str
    index_html     = html_templates["index"].replace("$COURSE_INDEX", links_str_no)
    index_eng_html = html_templates["index-eng"].replace("$COURSE_INDEX", links_str_en)
    index_html     = index_html.replace("$SEMESTER", semester)
    index_eng_html = index_eng_html.replace("$SEMESTER", semester)

    with open(upload_path+"/index.html", "w") as f:
        f.write(index_html)
    with open(upload_path+"/index-eng.html", "w") as f:
        f.write(index_eng_html)
Пример #7
0
def main():
    args = get_args()
    delete = args.delete
    exclude_pattern = re.compile(args.exclude)
    semester_pattern = re.compile(r'(V|H)[0-9]{4}')
    course_code_pattern = re.compile(r'(([A-Z]{1,5}-){0,1}[A-Z]{1,5}[0-9]{3,4})([A-Z]{1,5}){0,1}')
    for root, subdirs, files in os.walk(args.input):
        for file_x in files:
            path = path_join(root, file_x)
            filename, extension = os.path.splitext(path)
            m = exclude_pattern.search(path)
            if m is not None or path[0] == ".":
                print("Excluded: " + path)
                continue
            m = semester_pattern.search(path)
            if m is None:
                print("Skipped - No semester: " + path)
                continue
            semester = m.group(0)
            m = course_code_pattern.search(path)
            if m is None:
                print("Skipped - No course code: " + path)
                continue
            course = m.group(0)

            dir_name = extension[1:]
            if dir_name == "json":
                dir_name = "participation"
            target_folder = path_join(args.output, semester, "downloads", dir_name)
            os.makedirs( target_folder, exist_ok=True )
            newpath = path_join(target_folder, course + extension )

            if delete:
                # I hate windows:
                try:
                    os.remove(newpath)
                except:
                    pass
                os.rename(path, newpath)
            else:
                copyfile(path, newpath)
            if args.verbose:
                print(path)
                print(" -> "+newpath)
                print(root)

    while delete:
        delete = False
        for root, subdirs, files in os.walk(args.input):
            if len(subdirs) == 0 and len(files) == 0:
                os.rmdir(root)
                if args.verbose:
                    print("rm: "+path)
                delete = True
Пример #8
0
def write_to_file(folder, name, extension, content):
    if not os.path.exists(folder):
        os.makedirs(folder)
    filename = path_join(folder, name) + '.' + extension
    filename = path_clean(filename)
    with open(filename, 'w', encoding="utf-8") as f:
        f.write(content)
Пример #9
0
def write_to_file(folder, name, extension, content):
    if not os.path.exists(folder):
        os.makedirs(folder)
    filename = path_join(folder, name) + '.' + extension
    filename = path_clean(filename)
    with open(filename, 'w', encoding="utf-8") as f:
        f.write(content)
Пример #10
0
def generate_scales(semester):
    scales = OrderedDict()
    scales_path = "./data/"+semester+"/outputs/scales.json"
    default_scales_path = "./resources/scales.json"
    if not os.path.exists(scales_path):
        scales = load_json(default_scales_path)
    else:
        scales = load_json(scales_path)

    if not scales:
        scales = OrderedDict()
        q = "Remove this example question - How do you rate the course in general? (Add questions like this)"
        scales[q] = OrderedDict()

    convert_answer_case(scales)

    responses_path = "./data/"+semester+"/outputs/responses/"
    for (dirpath, dirnames, filenames) in os.walk(responses_path):
        for filename in filenames:
            if filename.endswith(".json"):
                file_path = path_join(dirpath,filename)
                scales_add_course(file_path, scales)
        break

    default_sort_scales(scales)
    try:
        autofill_scales(scales)
    except AutofillException:
        save_prompt_exit(scales, scales_path)
    dump_json(scales, scales_path)
    if print_error_check(scales):
        print("One or more inconsistency detected in " + scales_path)
        print("You will have to edit the file manually to add/edit/remove questions.")
        sys.exit(1)
Пример #11
0
def generate_stats_dir(responses_dir, participation_dir, output_dir, scales, course_names, semester_name):
    for filename in os.listdir(responses_dir):
        if ".json" in filename:
            course = OrderedDict()
            course_code = os.path.splitext(filename)[0]
            try:
                course_name = course_names[course_code]
            except KeyError:
                course_name = "Unknown"
                print("Warning: could not find name for course " + course_code)
            course["code"] = course_code
            course["name"] = course_name
            course["semester"] = semester_name
            responses_path = path_join(responses_dir,filename)
            participation_path = path_join(participation_dir,filename)
            output_path = path_join(output_dir, filename)
            generate_stats_file(responses_path, participation_path, output_path, scales, course)
Пример #12
0
def generate_stats_dir(responses_dir, participation_dir, output_dir, scales,
                       course_names, semester_name):
    for filename in os.listdir(responses_dir):
        if ".json" in filename:
            course = OrderedDict()
            course_code = os.path.splitext(filename)[0]
            try:
                course_name = course_names[course_code]
            except KeyError:
                course_name = "Unknown"
                print("Warning: could not find name for course " + course_code)
            course["code"] = course_code
            course["name"] = course_name
            course["semester"] = semester_name
            responses_path = path_join(responses_dir, filename)
            participation_path = path_join(participation_dir, filename)
            output_path = path_join(output_dir, filename)
            generate_stats_file(responses_path, participation_path,
                                output_path, scales, course)
Пример #13
0
def generate_scales(semester):
    scales = OrderedDict()
    scales_path = "./data/" + semester + "/outputs/scales.json"
    default_scales_path = "./resources/scales.json"
    if not os.path.exists(scales_path):
        scales = load_json(default_scales_path)
    else:
        scales = load_json(scales_path)

    if not scales:
        scales = OrderedDict()
        q = "Remove this example question - How do you rate the course in general? (Add questions like this)"
        scales[q] = OrderedDict()

    convert_answer_case(scales)

    responses_path = "./data/" + semester + "/outputs/responses/"
    for (dirpath, dirnames, filenames) in os.walk(responses_path):
        for filename in filenames:
            if filename.endswith(".json"):
                file_path = path_join(dirpath, filename)
                scales_add_course(file_path, scales)
        break

    default_sort_scales(scales)
    try:
        autofill_scales(scales)
    except AutofillException:
        save_prompt_exit(scales, scales_path)
    dump_json(scales, scales_path)
    if print_error_check(scales):
        print("One or more inconsistency detected in " + scales_path)
        print(
            "You will have to edit the file manually to add/edit/remove questions."
        )
        sys.exit(1)
Пример #14
0
            output_path = path_join(output_dir, filename)
            generate_stats_file(responses_path, participation_path,
                                output_path, scales, course)


def generate_stats_semester(semester_path, semester_name):
    scales_path = semester_path + "/outputs/scales.json"
    scales = load_json(scales_path)
    course_names = load_json("./resources/course_names/all.json")
    generate_stats_dir(semester_path + "/outputs/responses",
                       semester_path + "/downloads/participation",
                       semester_path + "/outputs/stats", scales, course_names,
                       semester_name)


if __name__ == '__main__':
    if len(sys.argv) == 1 or not os.path.isdir(sys.argv[1]):
        sys.exit("Must specify dir")
    directory = sys.argv[1]
    semester_dirs = []
    for (root, dirs, files) in os.walk(directory):
        for d in dirs:
            if "." not in d:
                semester_dirs.append(path_join(root, d))
                # TODO: Move this somewhere else:
                os.makedirs(path_join(root, d, "inputs", "md"), exist_ok=True)
                os.makedirs(path_join(root, d, "inputs", "tex"), exist_ok=True)
        break
    for d in semester_dirs:
        generate_stats_semester(d, os.path.basename(d))
Пример #15
0
def download_files(driver, args):
    downloaded = read_list(args.out + "/downloaded.txt")

    formdata = read_binary(args.out + "/formdata.dat")
    if not formdata:
        driver.get('https://nettskjema.uio.no/user/form/list.html')
        forms = driver.find_elements_by_css_selector('.forms .formName')
        formdata = [(form.text, form.get_attribute('href')) for form in forms]
        write_binary(args.out + "/formdata.dat", formdata)

    if args.filter:
        filtered = [x for x in formdata if args.filter in x[0]]
        print('Filter matched {} of {} forms'.format(len(filtered),
                                                     len(formdata)))
        formdata = filtered

    session = requests.Session()
    cookies = driver.get_cookies()
    for cookie in cookies:
        session.cookies.set(cookie['name'], cookie['value'])
    out_path = path_clean(args.out)
    tsv_path = path_join(out_path, 'tsv')
    html_path = path_join(out_path, 'html')
    stats_path = path_join(out_path, 'stats')

    for (name, url) in formdata:
        form_id = get_id(url)

        try:
            if form_id in downloaded:
                print("Skipping {} (id={})".format(name, form_id))
                continue
            print("Fetching {} (id={})".format(name, form_id))
        except UnicodeEncodeError as e:
            # NOTE: This error can be fixed by using os_encode on name,
            #       however I think it is useful to force windows users
            #       to change to utf-8, just in case wrong encoding
            #       causes problems elsewhere.
            error_msg = "\n".join([
                "Form id={}".format(form_id),
                "Form name: {}".format(os_encode(name)),
                "Your terminal probably doesn't like unicode.",
                "To fix this on windows, change codepage using this command:",
                "chcp 65001"
            ])
            error(error_msg, e, label="Non-unicode codepage")
        results_url = url.replace('preview', 'results')
        driver.get(results_url)
        stats = {
            'answered': try_to_find_int(driver,
                                        '.delivered-submissions .number'),
            'started': try_to_find_int(driver, '.saved-submissions .number'),
            'invited': try_to_find_int(driver, '.valid-invitations .number')
        }
        name_cleaned = filename_clean(name)
        if args.tsv:
            tsv_url = url.replace('preview', 'download') + '&encoding=utf-8'
            response = session.get(tsv_url)
            write_to_file(tsv_path, name_cleaned, 'tsv', response.text)

        if args.html:
            html_url = url.replace(
                'preview', 'report/web') + '&include-open=1&remove-profile=1'
            response = session.get(html_url)
            write_to_file(html_path, name_cleaned, 'html',
                          render_html(name, stats, response.text))

        if args.stats:
            stats_json = json.dumps(stats)
            write_to_file(stats_path, name_cleaned, 'json', stats_json)

        with open(args.out + "/downloaded.txt", 'a') as f:
            f.write(form_id + "\n")
Пример #16
0
            course["semester"] = semester_name
            responses_path = path_join(responses_dir,filename)
            participation_path = path_join(participation_dir,filename)
            output_path = path_join(output_dir, filename)
            generate_stats_file(responses_path, participation_path, output_path, scales, course)

def generate_stats_semester(semester_path, semester_name):
    scales_path = semester_path+"/outputs/scales.json"
    scales = load_json(scales_path)
    course_names = load_json("./resources/course_names/all.json")
    generate_stats_dir(semester_path+"/outputs/responses",
                       semester_path+"/downloads/participation",
                       semester_path+"/outputs/stats",
                       scales, course_names, semester_name)

if __name__ == '__main__':
    if len(sys.argv) == 1 or not os.path.isdir(sys.argv[1]):
        sys.exit("Must specify dir")
    directory = sys.argv[1]
    semester_dirs = []
    for (root, dirs, files) in os.walk(directory):
        for d in dirs:
            if "." not in d:
                semester_dirs.append(path_join(root, d))
                # TODO: Move this somewhere else:
                os.makedirs(path_join(root,d,"inputs","md"), exist_ok=True)
                os.makedirs(path_join(root,d,"inputs","tex"), exist_ok=True)
        break
    for d in semester_dirs:
        generate_stats_semester(d, os.path.basename(d))
Пример #17
0
def download_files(driver, args):
    downloaded = read_list(args.out+"/downloaded.txt")

    formdata = read_binary(args.out+"/formdata.dat")
    if not formdata:
        driver.get('https://nettskjema.uio.no/user/form/list.html')
        forms = driver.find_elements_by_css_selector('.forms .formName')
        formdata = [(form.text, form.get_attribute('href')) for form in forms]
        write_binary(args.out+"/formdata.dat",formdata)

    if args.filter:
        filtered = [x for x in formdata if args.filter in x[0]]
        print('Filter matched {} of {} forms'.format(len(filtered), len(formdata)))
        formdata = filtered

    session = requests.Session()
    cookies = driver.get_cookies()
    for cookie in cookies:
        session.cookies.set(cookie['name'], cookie['value'])
    out_path = path_clean(args.out)
    tsv_path = path_join(out_path, 'tsv')
    html_path = path_join(out_path, 'html')
    stats_path = path_join(out_path, 'stats')

    for (name, url) in formdata:
        form_id = get_id(url)

        try:
            if form_id in downloaded:
                print("Skipping {} (id={})".format(name,form_id))
                continue
            print("Fetching {} (id={})".format(name,form_id))
        except UnicodeEncodeError as e:
            # NOTE: This error can be fixed by using os_encode on name,
            #       however I think it is useful to force windows users
            #       to change to utf-8, just in case wrong encoding
            #       causes problems elsewhere.
            error_msg = "\n".join([
            "Form id={}".format(form_id),
            "Form name: {}".format(os_encode(name)),
            "Your terminal probably doesn't like unicode.",
            "To fix this on windows, change codepage using this command:",
            "chcp 65001"
            ])
            error(error_msg, e, label="Non-unicode codepage")
        results_url = url.replace('preview', 'results')
        driver.get(results_url)
        stats = {
            'answered': try_to_find_int(driver, '.delivered-submissions .number'),
            'started': try_to_find_int(driver, '.saved-submissions .number'),
            'invited': try_to_find_int(driver, '.valid-invitations .number')
        }
        name_cleaned = filename_clean(name)
        if args.tsv:
            tsv_url = url.replace('preview', 'download') + '&encoding=utf-8'
            response = session.get(tsv_url)
            write_to_file(tsv_path, name_cleaned, 'tsv', response.text)

        if args.html:
            html_url = url.replace('preview', 'report/web') + '&include-open=1&remove-profile=1'
            response = session.get(html_url)
            write_to_file(html_path, name_cleaned, 'html', render_html(name, stats, response.text))

        if args.stats:
            stats_json = json.dumps(stats)
            write_to_file(stats_path, name_cleaned, 'json', stats_json)

        with open(args.out+"/downloaded.txt", 'a') as f:
            f.write(form_id+"\n")