예제 #1
0
def get_top_series(grade_level, hard_seq, score_format):
    if grade_level.lower() == "freshman_10":
        top_set = utils.list_from_file("/Users/thomasolson/Documents/workspace/advising_revamp/series analysis/top1_subset_all_possible_series_10_old_cir_only_score_freshman_bonus_412add_211add.csv",
                                      "\n", ",", False)
    elif grade_level.lower() == "freshman_8":
        top_set = utils.list_from_file("/Users/thomasolson/Documents/workspace/advising_revamp/series analysis/7_25_top_1_impact_summary_seq.csv",
                                      "\n", ",", False)
    elif grade_level == "sfsu_seq_check":
        top_set = utils.list_from_file(
            "/Users/thomasolson/Documents/workspace/advising_revamp/series analysis/SFSU_Recommended_Seq.csv",
            "\n", ",", False)
    else:
        top_set = utils.list_from_file("/Users/thomasolson/Documents/workspace/advising_revamp/series analysis/all_possible_series_xfer_6_scored_412add.csv",
                                      "\n", ",", False)
    parsed_series = []
    for series in top_set:
        temp_list = []
        y =0
        if score_format:
            y=1
        for x in range (y, len(series)):
            ser = series[x]
            if score_format:
                ser = ser.strip("[]\\' ")
            crs = ser.split("_")[1]
            count = ser.split("_")[0]
            if hard_seq:
                temp_list.append(counter_list[int(count)]+"_"+crs)
                #temp_list.append(count+"_"+crs)
            else:
                temp_list.append(crs)

        parsed_series.append(temp_list)

    return parsed_series
예제 #2
0
def main_generate_student_data(name_path, core_path, elective_path):
    cnx = utils.get_connection("advisor", "passadvise", "localhost",
                               "ADVISING")
    name_data = utils.list_from_file(name_path, "\n", ",", False)
    elective_data = utils.list_from_file(elective_path, "\n", ",", False)
    core_data = utils.list_from_file(core_path, "\n", ",", False)
    students = generate_student(name_data, elective_data[:], core_data[:])
    persist_students(students, cnx)
    cnx.close()
예제 #3
0
def check_class_load_effect(core_path, elective_path, testers):
    elective_data = utils.list_from_file(elective_path, "\n", ",", False)
    core_data = utils.list_from_file(core_path, "\n", ",", False)
    exceptional_classes = []

    for course in core_data:
        if course[3] != "1":  #BE SURE TO CHECK THIS CORRECT CHECK
            exceptional_classes.append(course[0])
    for course in elective_data:
        if course[3] != "1":
            exceptional_classes.append(course[0])

    grade_sets_no_penalty = []
    grade_sets_penalty = []

    student_histories = gather_student_history(testers)

    for hist in student_histories:
        semesters = semester_dissolve(hist)
        for semester in semesters:
            course_count = len(semesters[semester])
            semester_courses = semesters[semester]
            penalty_class = False
            grade_set = []
            for course in semester_courses:
                if course[0] in exceptional_classes:
                    penalty_class = True
                grade_set.append(float(course[1]))
            if (penalty_class and course_count >= 5) or course_count > 5:
                #grade_sets_penalty.append(float(course[1]))
                grade_sets_penalty.append(numpy.mean(grade_set))

            else:
                #grade_sets_no_penalty.append(float(course[1]))
                grade_sets_no_penalty.append(numpy.mean(grade_set))
    prefix = "student"
    if testers:
        prefix = "cohort"

    print(prefix + " penalty course")
    print("mean : %.4f " % numpy.mean(grade_sets_penalty))
    print("median : %.4f" % numpy.median(grade_sets_penalty))
    print("std : %.4f" % numpy.std(grade_sets_penalty))

    print(prefix + " normal course")
    print("mean : %.4f " % numpy.mean(grade_sets_no_penalty))
    print("median : %.4f" % numpy.median(grade_sets_no_penalty))
    print("std : %.4f" % numpy.std(grade_sets_no_penalty))

    trace0 = go.Box(y=grade_sets_no_penalty,
                    name=prefix + ' normal course grades ',
                    marker=dict(color='rgb(214, 12, 140)', ))
    trace1 = go.Box(y=grade_sets_penalty,
                    name=prefix + ' penalty course grades',
                    marker=dict(color='rgb(0, 128, 128)', ))
    data = [trace0, trace1]
    py.plot(data, filename="feb4test_coure_load_penalty")
예제 #4
0
def check_bonus_effect(core_path, elective_path, testers):
    elective_data = utils.list_from_file(elective_path, "\n", ",", False)
    core_data = utils.list_from_file(core_path, "\n", ",", False)
    bonus_class_ref = {}

    for course in core_data:
        if course[5] != "":
            bonus_class_ref[course[0]] = course[5]
    for course in elective_data:
        if course[5] != "":
            bonus_class_ref[course[0]] = course[5]

    grade_sets_no_bonus = []
    grade_sets_bonus = []

    student_histories = gather_student_history(testers)

    for hist in student_histories:
        taken_classes = []
        for datas in hist:
            if datas[0] in bonus_class_ref:
                bonus_classes = bonus_class_ref[datas[0]].split(";")
                found = False
                for b_class in bonus_classes:
                    if b_class in taken_classes:
                        found = True
                if found:
                    grade_sets_bonus.append(float(datas[1]))
                else:
                    grade_sets_no_bonus.append(float(datas[1]))
            taken_classes.append(datas[0])

    prefix = "student"
    if testers:
        prefix = "cohort"

    print(prefix + " bonus course")
    print("mean : %.4f " % numpy.mean(grade_sets_bonus))
    print("median : %.4f" % numpy.median(grade_sets_bonus))
    print("std : %.4f" % numpy.std(grade_sets_bonus))

    print(prefix + " normal course")
    print("mean : %.4f " % numpy.mean(grade_sets_no_bonus))
    print("median : %.4f" % numpy.median(grade_sets_no_bonus))
    print("std : %.4f" % numpy.std(grade_sets_no_bonus))

    trace0 = go.Box(y=grade_sets_bonus,
                    name=prefix + ' grade_sets_bonus',
                    marker=dict(color='rgb(214, 12, 140)', ))
    trace1 = go.Box(y=grade_sets_no_bonus,
                    name=prefix + ' grade_sets_no_bonus',
                    marker=dict(color='rgb(0, 128, 128)', ))
    data = [trace0, trace1]
    py.plot(data, filename="feb4tests_grade_penalty")
예제 #5
0
def run_student_vectors(core_path, elective_path, request_type, vect_type,
                        base_dir, sim_path):
    elective_data = utils.list_from_file(elective_path, "\n", ",", False)
    core_data = utils.list_from_file(core_path, "\n", ",", False)
    class_dict = build_class_key_vector(core_data, elective_data, request_type)
    student_list = utils.get_students_history()
    student_vects = {}
    for student in student_list:
        vect = build_student_vector(student, class_dict, request_type,
                                    vect_type)
        student_vects[student] = vect
    sim_path = sim_path + request_type + "_" + vect_type + ".csv"
    cluster(student_vects, request_type, vect_type, base_dir, sim_path)
    return
예제 #6
0
def read_classes(**kwargs):
    course_list = []
    if 'core_path' in kwargs:
        core_data = utils.list_from_file(kwargs['core_path'], "\n", ",", False)
        for course in core_data:
            course_list.append(course[0].strip())

    if 'elective_path' in kwargs:

        elective_data = utils.list_from_file(kwargs['elective_path'], "\n",
                                             ",", False)
        for course in elective_data:
            course_list.append(course[0].strip())

    return course_list
예제 #7
0
def label_student_prepardness(
        students
):  #checks for student preparedness based on unprepared file list
    # Run as part of init.
    unprepard_list = utils.list_from_file("./unprepared_courses", "\n", ",",
                                          False)
    change_list = []
    for student in students:
        course_history = student.course_history
        label = set()
        for crs in course_history:
            if crs.name in unprepard_list:
                if "MATH" in crs.name:
                    label.add("UN_MATH")
                if "PHYS" in crs.name:
                    label.add("UN_PHYS")
        if len(label) == 0:
            continue
        if len(label) == 1:
            student.prep_assess = label.pop()
            student.prep_assess_summary = "1"
        else:
            student.prep_assess = label.pop() + ";" + label.pop()
            student.prep_assess_summary = "1"

        change_list.append(student)
    return change_list
예제 #8
0
    def load_annotations(self, ann_file):
        data_infos = []
        img_ids = list_from_file(ann_file)
        for img_id in img_ids:
            filename = f'JPEGImages/{img_id}.jpg'
            xml_path = osp.join(self.img_prefix, 'Annotations',
                                f'{img_id}.xml')
            tree = ET.parse(xml_path)
            root = tree.getroot()
            size = root.find('size')
            width = 0
            height = 0
            if size is not None:
                width = int(size.find('width').text)
                height = int(size.find('height').text)
            else:
                img_path = osp.join(self.img_prefix, 'JPEGImages',
                                    '{}.jpg'.format(img_id))
                img = Image.open(img_path)
                width, height = img.size
            ann = self.get_ann_info(root)
            ann['bboxes'][:, 2] = ann['bboxes'][:, 2] - ann['bboxes'][:, 0]
            ann['bboxes'][:, 3] = ann['bboxes'][:, 3] - ann['bboxes'][:, 1]
            data_infos.append(
                dict(id=img_id,
                     filename=filename,
                     width=width,
                     height=height,
                     ann=ann))

        return data_infos
예제 #9
0
def import_students_dict():
    students_raw = utils.list_from_file(
        "/Users/thomasolson/Documents/workspace/advising_revamp/sfsu_data_v2.csv", "\n", ",", True)
    student_dict = it.build_student_dict(students_raw, student_id=0, sex=4, ethnic=5, age=6, resident_status=7,
                                         standing=8, entry_major=13, final_major=33, spring_19_major=37,
                                         admin_descript=9, crs_abbr=15, crs_num=16, grade_str=21, year_int=3, term_gpa=22,
                                         sfsu_gpa=23, term_units=25, sfsu_units=26, grad_flag=31, spring_19_flag=35,
                                         crs_college_long=19, crs_dept_long=20, total_units=27)
    return student_dict
예제 #10
0
def get_class_prereqs():
    prereq_cat = {}
    crs_list = utils.list_from_file("./cs_prereqs", "\n", ",", False)
    for crs in crs_list:
        name = crs[0]
        if "^" in crs[1]:
            preq = crs[1].split("^")
        else:
            preq = crs[1].split(";")
        prereq_cat[name] = preq
    return prereq_cat
예제 #11
0
def precompute_sim(core_path, elective_path, request_type, vect_type, outpath):
    elective_data = utils.list_from_file(elective_path, "\n", ",", False)
    core_data = utils.list_from_file(core_path, "\n", ",", False)
    class_dict = build_class_key_vector(core_data, elective_data, request_type)
    student_list = utils.get_students_history()
    student_vects = {}
    for student in student_list:
        vect = build_student_vector(student, class_dict, request_type,
                                    vect_type)
        student_vects[student] = vect
    output = []
    for x in range(0, len(student_list)):
        student_a_vect = utils.grade_vect_to_bit(
            student_vects[student_list[x]])
        print(x)
        for y in range(x + 1, len(student_list)):
            student_b_vect = utils.grade_vect_to_bit(
                student_vects[student_list[y]])
            tani = 1.0 - jaccard_similarity_score(student_a_vect,
                                                  student_b_vect)
            output.append(
                str(student_list[x].id_num) + "," +
                str(student_list[y].id_num) + "," + str(tani))
    utils.list_to_file(outpath, output)
from utils import list_from_file

vocab = list_from_file('resource/vocabulary.txt')

files_changed = []
files_changed.append('data/corpus_masked_train.txt')

for filename in files_changed:
    data = list_from_file(filename)
    for i in range(len(data)):
        sentence = data[i]
        sequence = sentence.split()
        for j in range(len(sequence)):
            if sequence[j] not in vocab:
                sequence[j] = '<UNK>'
        sentence = ' '.join(sequence) + '\n'
        data[i] = sentence
    output_filename = filename.split('.')[0]
    output_filename += '_preprocessed.txt'
    with open(output_filename, 'w') as file:
        file.writelines(data)
예제 #13
0
from src.augmentation import BasicTextAugmentation
from utils import list_from_file
from feature_extraction import FeatureExtractor
from keras.models import load_model
from keras import backend as K
import tensorflow as tf
from keras.preprocessing.text import text_to_word_sequence
import json

config = tf.ConfigProto()
config.gpu_options.allow_growth = True
config.gpu_options.per_process_gpu_memory_fraction = 0.5
K.tensorflow_backend.set_session(tf.Session(config=config))

ta = BasicTextAugmentation()
vocab = list_from_file('resource/vocabulary.txt')
w2v_pathname = 'resource/w2v_path.txt'
fe = FeatureExtractor(5)
fe.set_w2v(w2v_pathname, 500, keep_alive=True)

class BengioAugmenter():
	def __init__(self, model_filename):
		self.gram = 5
		self.model = load_model(model_filename)

	def prob(self, sequences):
		for i in range(len(sequences)):
			if sequences[i] not in vocab:
				if sequences[i].isdigit():
					sequences[i] = '<NUM>'
				else:
예제 #14
0
#!/bin/python3
import subprocess

import utils

apt_apps: "list[str]" = utils.list_from_file("apt-apps.txt")
apt_ppas: "******" = utils.list_from_file("apt-ppas.txt")


def handle_ppas(ppas: '******'):
    print(f"\nNumber of ppas: {len(ppas)}")
    print(f"List of ppas: {ppas}\n")

    for ppa in ppas:
        subprocess.run(f"add-apt-repository -y {ppa}".split())


def handle_apt_apps(apt_apps: 'list[str]'):
    print(f"\nNumber of packages: {len(apt_apps)}")
    print(f"List of packages: {apt_apps}\n")

    subprocess.run(f"apt list".split() + [*apt_apps])

    print("Installing apps...")

    subprocess.run("apt install -y".split() + [*apt_apps])


subprocess.run("apt update".split())

if apt_ppas:
from src.bengio.augment import BengioAugmenter
from utils import list_from_file
from math import log10

model_name = input('Model: ')
ba = BengioAugmenter('model/' + model_name)

logprob = 0
n_word = 0
val_data = list_from_file('data/corpus_masked_test_preprocessed.txt')
for sentence in val_data:
    print(sentence)
    seq = sentence.split(' ')
    if len(seq) >= 6:
        for i in range(len(seq) - 5):
            n_word += 1
            seq_query = seq[i:i + 6]
            prob = ba.prob(seq_query)
            logprob += (-1) * log10(prob)

print('Log probability', logprob)
print('Banyak kata', n_word)
perplexity = 10**(logprob / n_word)
print('Perplexity', perplexity)
예제 #16
0
                             mode='max')

        self.model.fit(x_train,
                       y_train,
                       batch_size=32,
                       epochs=max_epoch,
                       validation_data=[x_val, y_val],
                       callbacks=[mc])

        self.model = load_model(save_filename, custom_objects={'f1': f1})

    def evaluate(self, x_test, y_test):
        print(self.model.evaluate(x_test, y_test))


aspect_list = list_from_file('resource/aspect.txt')
n_aspect = len(aspect_list)
w2v_pathname = 'resource/w2v_path.txt'
max_length = 50
fe = FeatureExtractor(max_length)
fe.set_w2v(w2v_pathname, 500, keep_alive=True)


def prepare_feature(filename):
    with open(filename) as file:
        data = json.load(file)

    sentences = [datum['sentence'] for datum in data]
    aspects = [datum['aspect'] for datum in data]

    sequences = [text_to_word_sequence(s) for s in sentences]