Exemplo n.º 1
0
def record_model(db_name, username, passwd, host, port, features, model,
                 exp_id):
    #model = [f1_w1_value,
    #         f2_w1_value
    #         f3_w1_value
    #         f1_w2_value,
    #         f2_w2_value,
    #         f3_w2_value,
    #         ...]
    conn = sql_functions.openSQLConnectionP(db_name, username, passwd, host,
                                            port)
    sql = "INSERT INTO %s.models(longitudinal_feature_id," % db_name
    sql = sql + '''
        longitudinal_feature_week,
        longitudinal_feature_value,
        exp_id)
        VALUES (%s, %s, %s, %s)
        '''
    data = [(features[0], 0, model[0][0], exp_id)]
    week = 0
    num_features = len(features) - 1
    for i, value in enumerate(model[1:]):
        feature_idx = (i % num_features) + 1
        if feature_idx == 1 and i != 0:
            week += 1
        data.append((features[feature_idx], week, value[0], exp_id))

    cursor = conn.cursor()
    cursor.executemany(sql, data)
    cursor.close()
    conn.commit()
    conn.close()
def record_model(db_name, username, passwd, host, port, features, model, exp_id):
    # model = [f1_w1_value,
    #         f2_w1_value
    #         f3_w1_value
    #         f1_w2_value,
    #         f2_w2_value,
    #         f3_w2_value,
    #         ...]
    conn = sql_functions.openSQLConnectionP(db_name, username, passwd, host, port)
    sql = "INSERT INTO %s.models(longitudinal_feature_id," % db_name
    sql = (
        sql
        + """
        longitudinal_feature_week,
        longitudinal_feature_value,
        exp_id)
        VALUES (%s, %s, %s, %s)
        """
    )
    data = [(features[0], 0, model[0], exp_id)]
    week = 0
    num_features = len(features) - 1
    for i, value in enumerate(model[1:]):
        feature_idx = (i % num_features) + 1
        if feature_idx == 0 and i != 0:
            week += 1
        data.append((features[feature_idx], week, value, exp_id))

    cursor = conn.cursor()
    cursor.executemany(sql, data)
    cursor.close()
    conn.commit()
    conn.close()
Exemplo n.º 3
0
def record_experiment(db_name, username, passwd, host, port, lead, lag,
                      auc_train, testing_course, auc_test, p_lambda, p_epsilon,
                      exp_time_stamp):
    conn = sql_functions.openSQLConnectionP(db_name, username, passwd, host,
                                            port)
    ##returns exp_id
    sql = '''INSERT INTO `%s`.`experiments`
          (`lead`,
          `lag`,
          `auc_train`,
          `course_test_id`,
          `auc_test`,
          `parameter_lambda`,
          `parameter_epsilon`,
          `experiment_time_stamp`
          )
        VALUES (%s, %s, %s, '%s', %s, %s, %s, '%s')''' % (
        db_name, lead, lag, auc_train, testing_course, auc_test, p_lambda,
        p_epsilon, exp_time_stamp)

    cursor = conn.cursor()
    cursor.execute(sql)
    cursor.close()
    conn.commit()

    sql = "SELECT exp_id FROM `%s`.`experiments` WHERE experiment_time_stamp = '%s'" % (
        db_name, exp_time_stamp)
    cursor = conn.cursor()
    cursor.execute(sql)
    data = cursor.fetchall()
    cursor.close()
    conn.close()
    return int(max(data, key=lambda x: int(x[0]))[0])
def preprocess(dbName, userName, passwd, host, port, dirName, startDate,
               currentDate):

    # fileName, wordsToBeReplaced, wordsToReplace
    preprocessing_files = [
        #[
        #'create_longitudinal_features.sql',
        #['moocdb'],
        #[dbName]
        #],
        #[
        #'populate_longitudinal_features.py',
        #[],
        #[]
        #],

        #[
        #'create_models_table.sql',
        #['moocdb'],
        #[dbName]
        #],

        #[
        #'create_experiments_table.sql',
        #['moocdb'],
        #[dbName]
        #],
        ['create_user_longitudinal_feature_values.sql', ['moocdb'], [dbName]],
        [
            'users_populate_dropout_week.sql',
            ['START_DATE_PLACEHOLDER', 'moocdb'], [startDate, dbName]
        ]
    ]

    conn = sql_functions.openSQLConnectionP(dbName, userName, passwd, host,
                                            port)

    for fileName, toBeReplaced, replaceBy in preprocessing_files:
        if fileName[-2:] == 'py':
            print "executing: ", fileName
            sql_functions.runPythonFile(conn, conn, dirName, fileName[:-3],
                                        dbName, startDate, currentDate)
        else:
            this_file = os.path.dirname(os.path.realpath(__file__))
            fileLocation = dirName + '/' + fileName
            fileLocation = this_file + '/' + fileLocation
            newFile = sql_functions.replaceWordsInFile(fileLocation,
                                                       toBeReplaced, replaceBy)
            print "executing: ", fileName
            sql_functions.executeSQL(conn, newFile)
        conn.commit()

    sql_functions.closeSQLConnection(conn)
Exemplo n.º 5
0
def run_sql_curation_files(dbName, userName, passwd, dbHost, dbPort,preprocessing_files):
    conn = sql_functions.openSQLConnectionP(dbName, userName, passwd, dbHost,dbPort)

    for fileName, toBeReplaced, replaceBy in preprocessing_files:
        fileLocation = os.path.dirname(os.path.realpath(__file__))+'/'+ fileName
        print fileLocation
        newFile = sql_functions.replaceWordsInFile(fileLocation, toBeReplaced, replaceBy)
        print "executing: ", fileName
        sql_functions.executeSQL(conn, newFile)
        conn.commit()
        print "done"

    sql_functions.closeSQLConnection(conn)
Exemplo n.º 6
0
def run_sql_curation_files(dbName, userName, passwd, dbHost, dbPort,preprocessing_files):
    conn = sql_functions.openSQLConnectionP(dbName, userName, passwd, dbHost,dbPort)

    for fileName, toBeReplaced, replaceBy in preprocessing_files:
        fileLocation = os.path.dirname(os.path.realpath(__file__))+'/'+ fileName
        print fileLocation
        newFile = sql_functions.replaceWordsInFile(fileLocation, toBeReplaced, replaceBy)
        print "executing: ", fileName
        sql_functions.executeSQL(conn, newFile)
        conn.commit()
        print "done"

    sql_functions.closeSQLConnection(conn)
def record_experiment(
    db_name,
    username,
    passwd,
    host,
    port,
    lead,
    lag,
    auc_train,
    testing_course,
    auc_test,
    p_lambda,
    p_epsilon,
    exp_time_stamp,
):
    conn = sql_functions.openSQLConnectionP(db_name, username, passwd, host, port)
    ##returns exp_id
    sql = """INSERT INTO `%s`.`experiments`
          (`lead`,
          `lag`,
          `auc_train`,
          `course_test_id`,
          `auc_test`,
          `parameter_lambda`,
          `parameter_epsilon`,
          `experiment_time_stamp`
          )
        VALUES (%s, %s, %s, '%s', %s, %s, %s, '%s')""" % (
        db_name,
        lead,
        lag,
        auc_train,
        testing_course,
        auc_test,
        p_lambda,
        p_epsilon,
        exp_time_stamp,
    )

    cursor = conn.cursor()
    cursor.execute(sql)
    cursor.close()
    conn.commit()

    sql = "SELECT exp_id FROM `%s`.`experiments` WHERE experiment_time_stamp = '%s'" % (db_name, exp_time_stamp)
    cursor = conn.cursor()
    cursor.execute(sql)
    data = cursor.fetchall()
    cursor.close()
    conn.close()
    return int(max(data, key=lambda x: int(x[0]))[0])
Exemplo n.º 8
0
def run_dropout_prediction(userName,
                           passwd,
                           host,
                           port,
                           trainingCourse,
                           testingCourse,
                           earliest_date,
                           latest_date,
                           features,
                           weeks,
                           pred_week,
                           feat_week,
                           epsilon,
                           lamb=1):
    conn = sql.openSQLConnectionP(trainingCourse, userName, passwd, host, port)

    ############## Download course data and split into train and test sets  ######################
    training_course_threshold = 0.6
    a = Course(trainingCourse, earliest_date, latest_date, features, weeks,
               training_course_threshold, conn)

    testing_course_threshold = 0.6
    b = Course(testingCourse, earliest_date, latest_date, features, weeks,
               testing_course_threshold, conn)

    ############## Set parameters #################################################################
    n_A = 2000  # Number of samples from source domain (course)
    n_B_known = 0.6  # Percentage of sample used from target domain available
    n_B_unknown = 0.4  # Percentage of sample used from target domain available
    is_FM = False
    seed = 1  # set up the randomization seed

    ############# Initialize model #################################################################
    model = initialize_model(a, b, n_A, n_B_known, n_B_unknown, pred_week,
                             feat_week, seed, is_FM)
    model.normalize_features_independently()

    ############# Test model performance ###########################################################
    auc_test = AUC_naive(model, lamb, epsilon)
    auc_train = AUC_train(model, lamb, epsilon)

    conn.close()
    return (auc_test, auc_train, model.weight)
Exemplo n.º 9
0
def run_dropout_prediction(userName,
                           passwd, host, port,
                           trainingCourse,
                           testingCourse,
                           earliest_date,
                           latest_date,
                           features,
                           weeks,
                           pred_week,
                           feat_week,
                           epsilon,
                           lamb=1):
    conn = sql.openSQLConnectionP(trainingCourse, userName, passwd, host, port)

    ############## Download course data and split into train and test sets  ######################
    training_course_threshold = 0.6
    a=Course(trainingCourse, earliest_date, latest_date, features, weeks,
            training_course_threshold, conn)

    testing_course_threshold = 0.6
    b=Course(testingCourse,  earliest_date, latest_date, features, weeks,
            testing_course_threshold, conn)

    ############## Set parameters #################################################################
    n_A=2000 # Number of samples from source domain (course)
    n_B_known=0.6 # Percentage of sample used from target domain available
    n_B_unknown=0.4  # Percentage of sample used from target domain available
    is_FM=False
    seed=1 # set up the randomization seed

    ############# Initialize model #################################################################
    model=initialize_model(a,b,n_A,n_B_known,n_B_unknown,pred_week,feat_week,seed,is_FM)
    model.normalize_features_independently()



    ############# Test model performance ###########################################################
    auc_test=AUC_naive(model,lamb,epsilon)
    auc_train = AUC_train(model,lamb, epsilon)

    conn.close()
    return (auc_test,auc_train,model.weight)
Exemplo n.º 10
0
def preprocess(dbName, userName, passwd, host, port, dirName, startDate, currentDate):


# fileName, wordsToBeReplaced, wordsToReplace
    preprocessing_files = [
        #[
         #'create_longitudinal_features.sql',
         #['moocdb'],
         #[dbName]
        #],
        #[
         #'populate_longitudinal_features.py',
         #[],
         #[]
        #],

        #[
         #'create_models_table.sql',
         #['moocdb'],
         #[dbName]
         #],

        #[
         #'create_experiments_table.sql',
         #['moocdb'],
         #[dbName]
        #],


        [
         'create_user_longitudinal_feature_values.sql',
         ['moocdb'],
         [dbName]
        ],

        [
         'users_populate_dropout_week.sql',
         ['START_DATE_PLACEHOLDER','moocdb'],
         [startDate,dbName]
        ]



    ]

    conn = sql_functions.openSQLConnectionP(dbName, userName, passwd, host,port)

    for fileName, toBeReplaced, replaceBy in preprocessing_files:
        if fileName[-2:] == 'py':
            print "executing: ", fileName
            sql_functions.runPythonFile(conn,conn,dirName,
                    fileName[:-3],dbName,startDate, currentDate)
        else:
            this_file = os.path.dirname(os.path.realpath(__file__))
            fileLocation = dirName+'/'+fileName
            fileLocation = this_file+'/'+fileLocation
            newFile = sql_functions.replaceWordsInFile(fileLocation, toBeReplaced, replaceBy)
            print "executing: ", fileName
            sql_functions.executeSQL(conn, newFile)
        conn.commit()

    sql_functions.closeSQLConnection(conn)