Exemplo n.º 1
0
import sys, numpy as np
from utils.read_csv import read_csv_by_column
from scipy.stats.stats import spearmanr

if len(sys.argv) != 3:
    sys.exit("usage: python spearman.py [num_variables] ***.csv: the number of variables should be the same as the number of columns in the csv file")
    
columns = []
var = int(sys.argv[1])
for i in range (1, (var+1)):
    columns.append("column " + str(i))
#print columns

values = read_csv_by_column(sys.argv[2], columns)
ylength = len(values[columns[0]])
inp = np.ndarray(shape=(var, ylength))

for i in range(0, len(columns)):
    inp[i] = values[columns[i]]

(rho, pval) = spearmanr(inp, axis=1)

print rho
print pval
Exemplo n.º 2
0
import sys
from utils.read_csv import read_csv_by_column
from scipy.stats import ttest_1samp, ttest_ind

if len(sys.argv) != 3:
    sys.exit("usage: python t_test.py [paired/unpaired] ***.csv")

"""
Let's say we only compare two data values at a time right now.
"""
columns = ["col1", "col2"]
values = read_csv_by_column("data/pre_post.csv", columns)

if sys.argv[1] == "paired":
    x = values[columns[0]]
    y = values[columns[1]]
    x = x-y
    # paired t-test: doing two measurments on the same experimental unit
    # e.g., before and after a treatment
    t_statistic, p_value = ttest_1samp(x, 0.0)

    # p < 0.05 => alternative hypothesis:
    # the difference in mean is not equal to 0
    print "paired t-test", p_value