Exemplo n.º 1
0
def start():
    # Initial website
    if request.method == "GET":
        return render_template("recommender.html",
                               units=UNITS,
                               courses=None,
                               selected_section=None,
                               recommendation=None)
    else:
        # The user wants a recommendation
        if request.get_json():
            data = request.get_json()
            # unescape solves html badly formatted characters
            courses_found = [
                html.unescape(course) for course in data['courses']
            ]
            section_found = html.unescape(data['section'])
            recommendation = predict(section_found, courses_found)
            return render_template("recommender.html",
                                   units=UNITS,
                                   courses=None,
                                   selected_section=section_found,
                                   recommendation=recommendation)
        # The user selected a section
        else:
            section_found = request.form['section']
            found_courses = load_enrolment_matrix(unit_name=section_found,
                                                  from_pickle=True)
            found_courses = found_courses.columns.tolist()
            found_courses.sort()
            return render_template("recommender.html",
                                   units=UNITS,
                                   courses=found_courses,
                                   selected_section=section_found,
                                   recommendation=None)
def load_co_enrolment_matrix(unit_name="Informatique",
                             from_pickle=False,
                             verbose=False):
    """
    Loads the co-enrolment matrix from disk or the database
    """
    if verbose:
        print("Loading the {} co enrolment matrix".format(unit_name))
    if from_pickle:
        return pd.read_pickle(
            DATA_FOLDER +
            '{}_co_enrolment_matrix.pkl'.format(UNITS[unit_name]))

    courses_matrix = load_enrolment_matrix(unit_name,
                                           from_pickle=True,
                                           verbose=verbose)
    co_enrolments = pd.DataFrame(data=0,
                                 columns=courses_matrix.columns,
                                 index=courses_matrix.columns)
    for row in courses_matrix.iterrows():
        taken_courses = row[1][row[1] == 1].index.tolist()
        for i, course in enumerate(taken_courses):
            co_enrolments.loc[course, taken_courses[i + 1:]] += 1

    # Copy the upper triangle matrix to lower triangle one
    co_enrolments = co_enrolments + co_enrolments.T

    # Transforming to probabilities and removing the rows summing to nan
    co_enrolments = co_enrolments / co_enrolments.sum(axis=0)
    return co_enrolments
def training_weight_coenrolments(user_index, unit_name="Informatique"):
    """
    Returns the training weights of co-enrolment
    """
    courses_matrix = load_enrolment_matrix(unit_name, from_pickle=True)
    courses_taken = courses_matrix.iloc[user_index][
        courses_matrix.iloc[user_index] == 1].index.tolist()
    return [
        get_coenrolment(c, courses_taken, unit_name)
        for c in courses_matrix.columns.tolist()
    ]
Exemplo n.º 4
0
def training_weight_grade_corr(user_index, unit_name="Informatique"):
    """
    Returns the grade correlation weights for a certain user
    """
    courses_matrix = load_enrolment_matrix(unit_name, from_pickle=True)
    courses_taken = courses_matrix.iloc[user_index][
        courses_matrix.iloc[user_index] == 1].index.tolist()
    return [
        get_grades_corr(c, courses_taken)
        for c in courses_matrix.columns.tolist()
    ]
def train_all_individual_models(dropout=0.998, hidden_layers=27, verbosity=2):
    """
    The aim of this method is to simply train all the models in order
    to store them on disk afterwards for dynamic loading.
    """
    for i, unit in enumerate(UNITS):
        print("Training the model for {} ({}/{})".format(
            unit, i + 1, len(UNITS)))
        train_model(load_enrolment_matrix(unit, from_pickle=True),
                    dropout,
                    hidden_layers,
                    verbosity,
                    save=unit)
Exemplo n.º 6
0
def load_grade_corr_matrix(from_pickle=False):
    """
    Returns the matrix of grade correlations inbetween courses
    """
    if from_pickle:
        return pd.read_pickle(DATA_FOLDER + 'grade_correlation_matrix.pkl')
    # Retrieve courses correlations
    grade_corr = pd.read_csv(DATA_FOLDER + 'correlation-subject-pair.csv')
    grade_corr = grade_corr[['sub1', 'sub2', "cor1", "cor2"]]
    grade_corr['cor_mean'] = grade_corr[['cor1', 'cor2']].apply(
        lambda x: correlation_series_mean(x[0], x[1]), axis=1)
    grade_corr = grade_corr[['sub1', 'sub2', 'cor_mean']]

    # Use SubjectName instead of SubjectID
    grade_corr['sub1_name'] = grade_corr.sub1.map(course_id_mapper)
    grade_corr['sub2_name'] = grade_corr.sub2.map(course_id_mapper)
    grade_corr = grade_corr.dropna()[['sub1_name', 'sub2_name', 'cor_mean']]

    # In case there are no correlations, we set to the mean of all of them
    mean_correlations = grade_corr.mean()

    # Let's make it a matrix
    grade_corr_matrix = grade_corr.set_index(
        ["sub1_name", "sub2_name"]).unstack(level=0).fillna(mean_correlations)
    # normalize correlations by adding 1 and dividing by the max
    grade_corr_matrix = (grade_corr_matrix + 1) / 2

    # Set not found courses correlations to the mean of all correlations
    no_corr_courses = [
        c for c in load_enrolment_matrix(from_pickle=True).columns.tolist()
        if c not in grade_corr_matrix.index.tolist()
    ]
    missing_correlations = pd.DataFrame(np.full(
        fill_value=mean_correlations,
        shape=(grade_corr_matrix.shape[0], len(no_corr_courses))),
                                        columns=no_corr_courses,
                                        index=grade_corr_matrix.index.tolist())
    grade_corr_matrix.columns = grade_corr_matrix.columns.droplevel()
    grade_corr_matrix = pd.concat([grade_corr_matrix, missing_correlations],
                                  axis=1)

    # Let's transform it into probabilistic
    grade_corr_matrix = grade_corr_matrix / grade_corr_matrix.sum(axis=0)

    grade_corr_matrix.to_pickle(DATA_FOLDER + 'grade_correlation_matrix.pkl')
    return grade_corr_matrix
def predict(unit="Informatique", courses=COURSES):
    """
    Recommends a list of courses from the ones you took or
    plan to take, and from your school unit.
    """
    courses_matrix = load_enrolment_matrix(unit_name=unit, from_pickle=True)
    my_courses = pd.DataFrame(data=0,
                              columns=courses_matrix.columns,
                              index=[USERNAME])
    my_courses[courses] = 1
    taken_courses = my_courses.loc[USERNAME][my_courses.loc[USERNAME] ==
                                             1].index.tolist()

    my_binary_courses = my_courses.as_matrix()
    binary_courses_format = np.array([[1]], dtype=np.int32)

    model = models.load_model(DATA_FOLDER +
                              '{}_cdae_model.hd5'.format(UNITS[unit]))
    prediction = model.predict(x=[my_binary_courses, binary_courses_format])

    # CDAE + co-enrolment + grade correlations model
    #prediction = np.array([ np.array(training_weight_coenrolments(i, unit)) * np.array(training_weight_grade_corr(i, unit)) * np.array(nn_weights) for i, nn_weights in enumerate(prediction) ])
    # CDAE + co-enrolment
    prediction = np.array([
        np.array(training_weight_coenrolments(i, unit)) * np.array(nn_weights)
        for i, nn_weights in enumerate(prediction)
    ])
    prediction = np.argsort(prediction)

    predicted_courses = [courses_matrix.columns[i] for i in prediction[0]]
    last_year_courses = list(
        get_last_year_registrations(unit_name=unit, from_pickle=True).index)
    predicted_courses = [
        c for c in predicted_courses
        if c in last_year_courses and c not in taken_courses
    ]

    return predicted_courses[::-1][:10]