示例#1
0
def get_variable_dataf(variable_ids, ro_id, survey_id, org_id, analysis_id):
    cnx, now = connect(), currtime()
    data = []
    for variable_id in variable_ids:
        cursor = cnx.cursor(dictionary=True, buffered=True)
        try:
            cursor.callproc('GetVariableDataF', [variable_id, org_id, survey_id, ro_id, 0, ''])
        except:
            cursor_err = cnx.cursor()
            cursor_err.execute(sperror("Variable data can't be retrieved", analysis_id))
            cnx.commit()
            cursor_err.close()
            cnx.close()
            exit(1)
        for i in cursor.stored_results():
            fetched = i.fetchall()
            fetched = [('1',) if element[0] is None else element for element in fetched]
            data.append(fetched)
        cursor.close()
    cnx.close()

    cleaned = []
    for variable in data:
        ints = []
        for data_point in variable:
            if data_point is not None:
                ints.append(int(data_point[0]))
            else:
                ints.append(1)
        cleaned.append(ints)

    return cleaned
示例#2
0
def save_results_as_corr(p, percentages, survey_id, variable_ids, options, analysis_id):
    cnx, now = connect(), currtime()
    for i in range(len(options[0])):
        for j in range(len(options[1])):
            cursor1 = cnx.cursor()
            cursor2 = cnx.cursor()
            query1 = """INSERT INTO correlation_analysis_results SET analysis_id = {current_id},
                       var1 = {var1}, var2 = {var2},  var1_option = {var1_option}, var2_option = {var2_option},
                       p_value = {p_value}, percentage = {percentage}, is_chisquare = 1""".format(
                       current_id=analysis_id, var1=variable_ids[0], var2=variable_ids[1],
                       var1_option=i+1, var2_option=j+1, p_value=p, percentage=round(percentages[i][j], 2))
            query2 = """INSERT INTO correlation_analysis_results SET analysis_id = {current_id},
                                   var1 = {var2}, var2 = {var1},  var1_option = {var2_option}, var2_option = {var1_option},
                                   p_value = {p_value}, percentage = {percentage}, is_chisquare = 1""".format(
                current_id=analysis_id, var1=variable_ids[0], var2=variable_ids[1],
                var1_option=i + 1, var2_option=j + 1, p_value=p, percentage=round(percentages[i][j], 2))
            try:
                cursor1.execute(query1)
                cursor2.execute(query2)
                cnx.commit()
            except:
                exit(1)
            cursor1.close()
            cursor2.close()
    cnx.close()
    return
示例#3
0
def ro(analysis_id, variable_ids):
    cnx, now = connect(), currtime()
    result, ro_id, survey_id, org_id = select_from_analysis(analysis_id)
    update_analysis_running(analysis_id)
    delete_results(analysis_id)
    variables = get_varinfo_from_varids(variable_ids, analysis_id)
    variable_ids = variable_ids.split(',')
    options = get_options(variable_ids)
    data = get_variable_dataf(variable_ids, ro_id, survey_id, org_id, analysis_id)
    check_empty_var(data, variables, cnx)
    orders = list(range(len(variable_ids)))

    for subset in combinations(orders, 2):
        (first, second) = subset
        print(subset)
        subids = [variable_ids[i] for i in [first, second]]
        subdata = [data[i] for i in [first, second]]
        subopts = [options[i] for i in [first, second]]
        counts = find_counts(subdata, subopts)
        percentages = find_percentages(counts)
        try:
            chi2, p, dof, ex = chi2_contingency(counts, correction=False)
        except:
            cursor_err = cnx.cursor()
            message = "Algorithm has failed"
            cursor_err.execute(sperror(message, analysis_id))
            cnx.commit()
            cursor_err.close()
            exit(1)
        p = round(p, 4)
        save_results_as_corr(p, percentages, survey_id, subids, subopts, analysis_id)

    update_analysis_done(analysis_id)
    return
示例#4
0
def update_analysis_done(analysis_id):
    cnx, now = connect(), currtime()
    cursor = cnx.cursor(dictionary=True)
    try:
        cursor.execute("""UPDATE analysis SET analysis_status = 'done', error = NULL,
                       modified = '{current_timestamp}' WHERE id = {current_id}"""
                       .format(current_timestamp=now, current_id=analysis_id))
    except:
        cursor_err = cnx.cursor()
        cursor_err.execute(sperror("Analysis couldn't be updated", analysis_id))
        cnx.commit()
        cursor_err.close()
        exit(1)
    cnx.close()
    return
示例#5
0
def delete_results(analysis_id):
    cnx, now = connect(), currtime()
    cursor = cnx.cursor(dictionary=True)
    try:
        cursor.execute("""DELETE FROM correlation_analysis_results 
                       WHERE analysis_id = {current_id} AND is_chisquare = 1"""
                       .format(current_id=analysis_id))
    except:
        cursor_err = cnx.cursor()
        cursor_err.execute(sperror("Old results couldn't be deleted", analysis_id))
        cnx.commit()
        cursor_err.close()
        exit(1)
    cnx.close()
    return
示例#6
0
def get_varinfo_from_varids(variable_ids, analysis_id):
    cnx, now = connect(), currtime()
    cursor = cnx.cursor(dictionary=True, buffered=True)
    try:
        cursor.execute("""SELECT var_id AS id, var_type AS type, var_subtype AS subtype,
                        var_name AS name FROM var_list WHERE var_id IN ({current_var_ids})"""
                       .format(current_var_ids=variable_ids))
    except:
        cursor_err = cnx.cursor()
        cursor_err.execute(sperror("A variable can't be retrieved", analysis_id))
        cnx.commit()
        cursor_err.close()
        cnx.close()
        exit(1)
    variables = cursor.fetchall()
    cursor.close()
    cnx.close()
    return variables
示例#7
0
def select_from_analysis(analysis_id):
    cnx, now = connect(), currtime()
    cursor = cnx.cursor(dictionary=True)
    try:
        cursor.execute("SELECT * FROM analysis WHERE id = {current_id}"
                       .format(current_id=analysis_id))
    except:
        cursor_err = cnx.cursor()
        cursor_err.execute(sperror("Could not find analysis in database", analysis_id))
        cnx.commit()
        cursor_err.close()
        exit(1)
    result = cursor.fetchall()
    ro_id = result[0]["ro_id"]
    survey_id = result[0]["survey_id"]
    org_id = result[0]["org_id"]
    cursor.close()
    cnx.close()
    return result, ro_id, survey_id, org_id
示例#8
0
def get_options(variable_ids):
    cnx, now = connect(), currtime()
    options = []
    for variable_id in variable_ids:
        cursor = cnx.cursor(dictionary=True, buffered=True)
        if True:
            try:
                cursor.callproc('GetVariableOptions', [variable_id])
            except:
                cursor_err = cnx.cursor()
                cnx.commit()
                cursor_err.close()
                cnx.close()
                print("---")
                exit(1)
            for i in cursor.stored_results():
                options.append(i.fetchall())
        cursor.close()
    cnx.close()
    return options
示例#9
0
def get_params(analysis_id):
    result, ro_id, survey_id, org_id = select_from_analysis(analysis_id)
    cnx, now = connect(), currtime()
    cursor = cnx.cursor(dictionary=True, buffered=True)
    ro_id = result[0]["ro_id"]
    try:
        cursor.execute("""SELECT var_id AS id, var_type AS type, var_subtype AS subtype FROM ro_vars 
                        INNER JOIN var_list ON var_list.var_id = ro_vars.variable_id 
                        WHERE ro_vars.ro_id = {current_ro_id}""".format(current_ro_id=ro_id))
    except:
        cursor_err = cnx.cursor()
        cursor_err.execute(sperror("A variable can't be retrieved", analysis_id))
        cnx.commit()
        cursor_err.close()
        cnx.close()
        exit(1)
    variables = cursor.fetchall()
    cursor.close()

    variable_ids = str(variables[0]["id"]) + "," + str(variables[1]["id"])

    cnx.close()
    return org_id, survey_id, variable_ids
示例#10
0
from checkwarns import checkwarns
import argparse
import json

checkwarns()

writelog = False

if not writelog:
    log = logging.getLogger()
    log.setLevel(logging.CRITICAL)

test = False

cnx = connect()
now = currtime()


# binumerical correlation
def twocorr(x, y):
    try:
        corr, p = pearsonr(x, y)
    except:
        cursor_err = cnx.cursor()
        # cursor_err.execute(sperror("Correlation can't be done", analysis_id))
        cursor_err.execute(sperror("Correlation can't be done", 1))
        cnx.commit()
        cursor_err.close()
        cnx.close()
        exit(1)
示例#11
0
def main(org_id, survey_id, variable_ids):
    if org_id == 0 or survey_id == 0 or variable_ids == "":
        exit(1)

    variable_ids = variable_ids.split(",")

    cnx = connect()
    now = currtime()

    data_raw = []

    for variable_id in variable_ids:
        cursor = cnx.cursor(dictionary=True, buffered=True)
        try:
            cursor.callproc('GetVariableDataF',
                            [variable_id, org_id, survey_id, 'FALSE', 'NULL'])
        except:
            cursor_err = cnx.cursor()
            # cursor_err.execute(sperror("Variables can't be retrieved", analysis_id))
            cnx.commit()
            cursor_err.close()
            cnx.close()
            print("-")
            exit(1)
        for i in cursor.stored_results():
            data_raw.append(i.fetchall())
        cursor.close()

    data = []
    for variable in data_raw:
        data_points = []
        for point in variable:
            try:
                to_be_appended = float(point[0])
            except:
                to_be_appended = 0.0
            data_points.append(to_be_appended)
        data.append(data_points)

    result = []
    for i in range(0, len(data)):
        for j in range(i, len(data)):
            if i != j:
                try:
                    corr, p = pearsonr(data[i], data[j])
                    if np.isnan(corr):
                        corr = 0.00
                except:
                    checkprint("ERR_IND")
                    exit(1)
                result.append({
                    "v1": variable_ids[i],
                    "v2": variable_ids[j],
                    "c": round(corr, 2)
                })

    result = json.dumps(result)
    print(result)

    cnx.close()

    return result
示例#12
0
def regression(regression_id):
    cnx = connect()
    now = currtime()

    # try starting analysis
    cursor = cnx.cursor(dictionary=True)
    try:
        cursor.execute("""UPDATE analysis SET analysis_status = 'running',
                                error = NULL,
                                modified = '{current_time}' 
                                WHERE id = {current_id}""".format(current_time=now, current_id=analysis_id))
    except:
        cursor_err = cnx.cursor()
        cursor_err.execute(sperror("Analysis couldn't start", analysis_id))
        cnx.commit()
        cursor_err.close()
        cnx.close()
        exit(1)
    cnx.commit()
    cursor.close()

    # remove old results
    cursor = cnx.cursor()
    try:
        cursor.execute("""DELETE FROM regression_analysis_results 
                            WHERE analysis_id = {current_id}""".format(current_id=analysis_id))
    except:
        cursor_err = cnx.cursor()
        cursor_err.execute(sperror("Can't access or modify analysis results", analysis_id))
        cnx.commit()
        cursor_err.close()
        cnx.close()
        exit(1)
    cnx.commit()
    cursor.close()

    # select given analysis id
    cursor = cnx.cursor(dictionary=True)
    try:
        cursor.execute("SELECT * FROM analysis WHERE id = {current_id} AND analysis_type = 'REGRESSION'"
                        .format(current_id = analysis_id))
    except:
        cursor_err = cnx.cursor()
        cursor_err.execute(sperror("Could not find analysis in database", analysis_id))
        cnx.commit()
        cursor_err.close()
        exit(1)
    result = cursor.fetchall()
    ro_id = result[0]["ro_id"]
    survey_id = result[0]["survey_id"]
    org_id = result[0]["org_id"]
    cursor.close()

    # fetch dependent variable
    cursor = cnx.cursor(dictionary=True, buffered=True)
    try:
        cursor.execute("""SELECT var_id AS id, var_type AS type, var_subtype AS subtype FROM ro_vars
                           INNER JOIN var_list ON var_list.var_id = ro_vars.variable_id
                           WHERE ro_vars.ro_id = {current_ro_id} AND ro_vars.analysis_var_type = 'DV'"""
                           .format(current_ro_id=ro_id))
    except:
        cursor_err = cnx.cursor()
        cursor_err.execute(sperror("DV can't be retrieved", analysis_id))
        cnx.commit()
        cursor_err.close()
        cnx.close()
        exit(1)
    dv = cursor.fetchall()
    cursor.close()
    checkprint(dv)

    # fetch independent variables
    ivids = []
    cursor = cnx.cursor(dictionary=True, buffered=True)
    ro_id = result[0]["ro_id"]
    try:
        cursor.execute("""SELECT var_id AS id, var_type AS type, var_subtype AS subtype FROM ro_vars
                           INNER JOIN var_list ON var_list.var_id = ro_vars.variable_id
                           WHERE ro_vars.ro_id = {current_ro_id} AND ro_vars.analysis_var_type = 'IV'"""
                           .format(current_ro_id=ro_id))
    except:
        cursor_err = cnx.cursor()
        cursor_err.execute(sperror("IV can't be retrieved", analysis_id))
        cnx.commit()
        cursor_err.close()
        cnx.close()
        exit(1)
    ivs = cursor.fetchall()
    cursor.close()
    for iv in ivs:
        ivids.append(iv['id'])

    # get options for categorical independent variables
    options_iv = []
    for variable in ivs:
        cursor = cnx.cursor(dictionary=True, buffered=True)
        if True:
            try:
                cursor.callproc('GetVariableOptions', [variable["id"]])
            except:
                cursor_err = cnx.cursor()
                cursor_err.execute(sperror("Variable options can't be retrieved", analysis_id))
                cnx.commit()
                cursor_err.close()
                cnx.close()
                exit(1)
            for i in cursor.stored_results():
                options_iv.append(i.fetchall())
        cursor.close()
    checkprint(options_iv)

    # get options for categorical dependent variables
    options_dv = []
    cursor = cnx.cursor(dictionary=True, buffered=True)
    if True:
        try:
            cursor.callproc('GetVariableOptions', [dv[0]["id"]])
        except:
            cursor_err = cnx.cursor()
            cursor_err.execute(sperror("Variable options can't be retrieved", analysis_id))
            cnx.commit()
            cursor_err.close()
            cnx.close()
            exit(1)
        for i in cursor.stored_results():
            options_dv.append(i.fetchall())
    cursor.close()

    # fetch data points for independent variables
    data_raw_iv = []
    for i in range(0, len(ivs)):
        variable = ivs[i]
        cursor = cnx.cursor(dictionary=True, buffered=True)
        try:
            # cursor.callproc('GetVariableData', [variable["id"], org_id, survey_id])
            cursor.callproc('GetVariableDataF', [variable["id"], org_id, survey_id, ro_id, 0, ''])
        except:
            cursor_err = cnx.cursor()
            cursor_err.execute(sperror("IV content can't be retrieved", analysis_id))
            cnx.commit()
            cursor_err.close()
            cnx.close()
            exit(1)
        for j in cursor.stored_results():
            fetched = j.fetchall()
            # for numerical variables
            if ivs[i]['subtype'] in ['decimal', 'numeric', 'scalar', 'labeled_scalar']:
                fetched = [float(x[0]) if x[0] is not None else float(random.gauss(0, 1)) for x in fetched]
            # for categorical variables
            elif ivs[i]['subtype'] in ['single_choice', 'multiple_choice', 'text_scalar']:
                binaries = []
                for option_id, option_label in options_iv[i]:
                    binary = []
                    for point in fetched:
                        try:
                            # binarize each option for each categorical variable
                            if point[0] == option_id:
                                binary.append(1.0)
                            else:
                                binary.append(0.0)
                        except:
                            binary.append(0.0)
                    binaries.append(binary)
                fetched = binaries
            else:
                exit(1)
            data_raw_iv.append(fetched)
        cursor.close()

    # get type of the dependent variable
    dv_type = dv[0]['subtype']
    mulchoopts = []

    # fetch data points for dependent variable
    data_raw_dv = []
    for variable in dv:
        cursor = cnx.cursor(dictionary=True, buffered=True)
        try:
            # cursor.callproc('GetVariableData', [variable["id"], org_id, survey_id])
            cursor.callproc('GetVariableDataF', [variable["id"], org_id, survey_id, ro_id, 0, ''])
        except:
            cursor_err = cnx.cursor()
            cursor_err.execute(sperror("DV content can't be retrieved", analysis_id))
            cnx.commit()
            cursor_err.close()
            cnx.close()
            exit(1)
        for i in cursor.stored_results():
            fetched = i.fetchall()
        if dv_type in ['decimal', 'numeric', 'scalar', 'labeled_scalar']:
            fetched = [float(x[0]) if x[0] is not None else float(random.gauss(0, 1)) for x in fetched]
            data_raw_dv.append(fetched)
        elif dv_type in ['single_choice', 'text_scalar', 'multiple_choice']:
            for option_id, option_label in options_dv[0]:
                mulchoopts.append(option_id)
            checkprint(mulchoopts)
            for mulchoopt in mulchoopts:
                binary = []
                for point in fetched:
                    try:
                        # binarize options for categorical variables
                        if point[0] == mulchoopt:
                            binary.append(1.0)
                        else:
                            binary.append(0.0)
                    except:
                        binary.append(0.0)
                data_raw_dv.append([binary])
        else:
            exit(1)
        cursor.close()


    # start the appropriate analysis according to the type of the dependent variable
    if dv_type in ['decimal', 'numeric', 'scalar', 'labeled_scalar']:
        if len(ivids) >= len(data_raw_dv[0]):
            try:
                cursor_err = cnx.cursor()
                cursor_err.execute(sperror("ERR_TOO_FEW_DATA", analysis_id))
                cnx.commit()
                cursor_err.close()
                checkprint("error can be saved")
            except:
                checkprint("error can't be saved")
            cnx.close()
            exit(1)
        try:
            # start linear regression for numerical dv
            linear_regression(cnx, now, regression_id, ivs, ivids, dv,
                              data_raw_iv, data_raw_dv, options_iv)
        except:
            exit(1)
    elif dv_type in ['single_choice', 'text_scalar', 'multiple_choice']:
        if len(ivids) >= len(data_raw_dv[0][0]):
            try:
                cursor_err = cnx.cursor()
                cursor_err.execute(sperror("ERR_TOO_FEW_DATA", analysis_id))
                cnx.commit()
                cursor_err.close()
                checkprint("error can be saved")
            except:
                checkprint("error can't be saved")
            cnx.close()
            exit(1)
        for i in range(0, len(mulchoopts)):
            # start multiple regression for each option of categorical dv
            multi_regression(cnx, now, regression_id, ivs, ivids, dv,
                             data_raw_iv, data_raw_dv[i], options_iv, options_dv, mulchoopts[i])
    else:
        cursor_err = cnx.cursor()
        cursor_err.execute(sperror("Unknown DV type", analysis_id))
        cnx.commit()
        cursor_err.close()
        cnx.commit()
        exit(1)

    try:
        cnx.close()
    except:
        exit(1)
示例#13
0
def getdata(org_id, survey_id, variable_ids):
    if org_id == 0 or survey_id == 0 or variable_ids == "":
        exit(1)

    variable_ids = variable_ids.split(",")
    # checkprint(variable_ids)

    cnx = connect()
    now = currtime()

    # checkprint("start")

    counter = 0
    data_raw = []
    for variable_id in variable_ids:
        if not int(variable_id) == 2846:
            continue
        print(counter)
        counter += 1
        cursor = cnx.cursor(dictionary=True, buffered=True)
        try:
            cursor.callproc('GetVariableData',
                            [variable_id, org_id, survey_id])
        except:
            cursor_err = cnx.cursor()
            cnx.commit()
            cursor_err.close()
            cnx.close()
            exit(1)
        for i in cursor.stored_results():
            fetched = i.fetchall()
            print(fetched)
            fetched = [('0', ) if element[0] is None else element
                       for element in fetched]
            data_raw.append(fetched)
        cursor.close()

    exit(1)

    options = []
    for variable_id in variable_ids:
        print(counter)
        counter += 1
        cursor = cnx.cursor(dictionary=True, buffered=True)
        if True:
            try:
                cursor.callproc('GetVariableOptions', [variable_id])
            except:
                cursor_err = cnx.cursor()
                cnx.commit()
                cursor_err.close()
                cnx.close()
                exit(1)
            for i in cursor.stored_results():
                options.append(i.fetchall())
        cursor.close()

    # checkprint(options)

    cnx.close()
    # for variable in data_raw:
    # checkprint(variable)
    # checkprint(options)
    return data_raw, options