def analyzeClusters(n_loops=1, cl=None, sp=None, shuffled=False, spShuff=False):
    results = {}
    n = n_loops

    bins = [i for i in drange(0.0, 1.0, 0.1)]
    total_hist = [0 for i in bins]

    data = win.getData(shuffle=shuffled, class_=cl, spec=sp)
    if spShuff is True:
        win.shuffleIt(data, mode=2)
    Z = hie.linkage(data, method='average', metric='correlation')
    D = hie.dendrogram(Z, orientation='left', no_plot=True)

    total_ys = [0 for d in D['dcoord']]
    total_z = [0 for d in Z[::-1, 2]]
    total_acc = [0 for d in np.diff(Z[::-1, 2], 2)]

    for ii in range(0, n):  # for loop added to average shuffled results
        # data = win.getData(shuffle=True, class_='J')
        # labels = win.getStudents(class_=classes[0])
        # labels = [str(st.class_) + " " + str(st.spec) for st in labels]

        Z = hie.linkage(data, method='average', metric='correlation')
        D = hie.dendrogram(Z, orientation='left', no_plot=True)

        # print(data[40, :])
        # print(data[42, :])

        # freq method
        ys = [d[1] for d in D['dcoord']]
        total_ys = [a + b for a, b in zip(ys, total_ys)]
        hist, bins = np.histogram(ys, bins=bins)
        total_hist = [a + b for a, b in zip(hist, total_hist)]

        # elbow method (sort of)
        z = Z[::-1, 2]
        total_z = [a + b for a, b in zip(z, total_z)]

        # inv elbow
        acceleration = np.diff(Z[::-1, 2], 2)  # 2nd derivative of distances
        total_acc = [a + b for a, b in zip(acceleration, total_acc)]
        if ii < n - 1:  # dont get new data if there wont be another loop
            data = win.getData(shuffle=shuffled, class_=cl, spec=sp)

    total_hist = [a / n for a in total_hist]
    total_ys = [a / n for a in total_ys]
    total_z = [a / n for a in total_z]
    total_acc = [a / n for a in total_acc]

    results['bins'] = (bins[:-1] + bins[1:]) / 2
    results['hist'] = total_hist
    results['ys'] = total_ys
    results['z'] = total_z
    results['acc'] = total_acc
    return results
import window_s_p_ft as win
import numpy as np
import scipy.stats as scp


data = win.getData(class_=None)
co_corr = np.corrcoef(data, rowvar=0)
print(np.mean(co_corr), scp.sem(co_corr, axis=None), sep=' +/- ')
print('var: ', np.var(co_corr, axis=None))

n = 10000

'''c shuff'''
m = 0
e = 0
v = 0
for ii in range(n):
    data = win.getData(class_=None)
    win.shuffleIt(data, 2)
    co_corr = np.corrcoef(data, rowvar=0)
    m += np.mean(co_corr)
    e += scp.sem(co_corr, axis=None)
    v += np.var(co_corr, axis=None)
m /= n
e /= n
v /= n
print(m, e, sep=' +/- ')
print('var: ', v)

'''s shuff'''
m = 0
    'Fizyka kwantowa': 'Quantum Physics',
    'Fizyka statystyczna i termodynamika': 'Statistical Physics and Thermodynamics',
    'Opracowanie danych doświadczalnych': 'Analysis of Experimental Data',
    'Analiza matematyczna 2': 'Mathematical Analysis 2',
    'Probabilistyka': 'Probability',
    'Algebra z geometrią': 'Algebra and Geometry',
    'Wstęp do fizyki jądrowej': 'Introduction to Nuclear Physics',
    'Analiza matematyczna 3': 'Mathematical Analysis 3',
    'Laboratorium fizyki 2': 'Physics Laboratory 2',
    'Analiza matematyczna 1': 'Mathematical Analysis 1',
    'Mechanika': 'Mechanics'
}

cl = 'L'

co_corr = np.corrcoef(win.getData(class_=cl), rowvar=0)
labels = [pl_en[x] for x in win.getCoursesNames()]

mds = MDS(n_components=2, dissimilarity='precomputed')
dists = np.empty((len(co_corr), len(co_corr)))
for ii in range(len(labels)):
    for jj in range(len(labels)):
        dists[ii][jj] = math.sqrt(2 * (1 - co_corr[ii][jj]))
pos = mds.fit(dists).embedding_

G = nx.Graph()
G.add_nodes_from(range(len(labels)))
textstr = ""
for ii, l in enumerate(labels):
    textstr += str(ii) + " - " + l + "\n"
    for jj in range(ii + 1, len(labels)):
# sp = students[0].spec
# anno = []
# anno2 = []
# for ii, st in enumerate(students):
#     if st.class_ != cl:
#         anno.append((ii - 1, students[ii - 1].class_))
#         cl = st.class_
#     if st.spec != sp:
#         anno2.append((ii - 1, students[ii - 1].spec))
#         sp = st.spec
# anno.append(((len(students) - 1), students[-1].class_))
# anno2.append(((len(students) - 1), students[-1].spec))
# anno = anno + anno2

"""set up ranges"""
students = win.getData()
co_corr = np.corrcoef(students, rowvar=0)
vmin, vmax = co_corr.min(), co_corr.max()
win.shuffleIt(students, 2)
co_corr = np.corrcoef(students, rowvar=0)
vmin = vmin if co_corr.min() >= vmin else co_corr.min()
vmax = vmax if co_corr.min() <= vmax else co_corr.max()
students = win.getData()
win.shuffleIt(students, 1)
co_corr = np.corrcoef(students, rowvar=0)
vmin = vmin if co_corr.min() >= vmin else co_corr.min()
vmax = vmax if co_corr.min() <= vmax else co_corr.max()
vmin, vmax = vmin - 0.0001, vmax + 0.0001

"""plot heatmap"""
font = {"family": "normal", "weight": "bold", "size": 22}

'''spec: OE, MN, FK, FM'''
# students = win.getStudents(shuffle=False)

# cl = students[0].class_
# sp = students[0].spec
# anno = []
# anno2 = []
# for ii, st in enumerate(students):
#     if st.class_ != cl:
#         anno.append((ii - 1, students[ii - 1].class_))
#         cl = st.class_
#     if st.spec != sp:
#         anno2.append((ii - 1, students[ii - 1].spec))
#         sp = st.spec
# anno.append(((len(students) - 1), students[-1].class_))
# anno2.append(((len(students) - 1), students[-1].spec))
# anno = anno + anno2

students = win.getData(shuffle=False, spec='FK')

# win.shuffleIt(students, 2)
# win.shuffleIt(students, 1)

# st_corr = pairwise_distances(students, students, 'jaccard')
co_corr = pairwise_distances(students.T, students.T, 'jaccard')

heatmap.plotheat(co_corr, xlabels=win.getCoursesNames(), ylabels=win.getCoursesNames(), mode='special')
# heatmap.plotheat(st_corr, changeTicks=False, annotation=anno, mode='special')
import networkx as nx
import matplotlib.pyplot as plt
import window_s_p_ft as win
import numpy as np
import math
from sklearn.manifold import MDS

cl = 'L'

data = win.getData(class_=cl)
data = sorted(data, key=lambda s: np.mean(s), reverse=True)
studs = win.getStudents(class_=cl)
studs = sorted(studs, key=lambda s: np.mean(s.grades), reverse=True)
st_corr = np.corrcoef(data, rowvar=1)

mds = MDS(n_components=2, dissimilarity='precomputed')
dists = np.empty((len(st_corr), len(st_corr)))
for ii in range(len(data)):
    for jj in range(len(data)):
        dists[ii][jj] = math.sqrt(2 * (1 - st_corr[ii][jj]))
pos = mds.fit(dists).embedding_

G = nx.Graph()
G.add_nodes_from(range(len(data)))
labels = []
for ii in range(len(data)):
    labels.append(str(ii + 1) + " " +
                  str(studs[ii].spec))
    for jj in range(ii + 1, len(data)):
        d = math.sqrt(2 * (1 - st_corr[ii][jj]))
        G.add_edge(ii, jj, weight=d)
    'Podstawy elektroniki': 'Fundamentals of Electronics',
    'Grafika inżynierska': 'Engineering Graphics',
    'Metody matematyczne fizyki': 'Mathematical Methods of Physics',
    'Elektronika w eksperymencie fizycznym': 'Electronics in Physical Experiment',
    'Podstawy projektowania przyrządów wirtualnych': 'Fundamentals of Virtual Devices Design',
    'Programowanie obiektowe': 'Object-Oriented Programming',
    'Podstawy optyki': 'Fundamentals of Optics',
    'Fizyka kwantowa': 'Quantum Physics',
    'Fizyka statystyczna i termodynamika': 'Statistical Physics and Thermodynamics',
    'Opracowanie danych doświadczalnych': 'Analysis of Experimental Data',
    'Analiza matematyczna 2': 'Mathematical Analysis 2',
    'Probabilistyka': 'Probability',
    'Algebra z geometrią': 'Algebra and Geometry',
    'Wstęp do fizyki jądrowej': 'Introduction to Nuclear Physics',
    'Analiza matematyczna 3': 'Mathematical Analysis 3',
    'Laboratorium fizyki 2': 'Physics Laboratory 2',
    'Analiza matematyczna 1': 'Mathematical Analysis 1',
    'Mechanika': 'Mechanics'
}

courses = win.getData().T
courses_names = win.getCoursesNames()
courses_names = [pl_en[x] for x in courses_names]
co_corr = np.corrcoef(win.getData(), rowvar=0)

for ii, c in enumerate(courses):
    av = round(np.mean(c), 2)
    var = round(np.var(c), 2)
    cij = round(sum(co_corr[ii]) - 1.0, 2)
    print(courses_names[ii], av, var, cij, sep=' & ')
import window_s_p_ft as win
import numpy as np
import scipy.stats as scp
import math
import matplotlib.pyplot as plt
import matplotlib

classes = ['J', 'K', 'L']
X = np.zeros((5, 217))
for sem in range(1, 6):
    counter = 0
    for cl in classes:
        studs = win.getData(class_=cl, sems='=' + str(sem))
        glav = np.mean(studs)
        glsig = scp.sem(studs, axis=None)
        for s in studs:
            X[sem - 1][counter] = (np.mean(s) - glav) / glsig
            counter += 1

plt.xlabel('xn')
plt.ylabel('xn+1')
for ii in range(0, 4):
    plt.plot(X[ii, :], X[ii + 1, :], 'o', label='n='+str(ii+1)+',n+1='+str(ii+1+1))
    plt.legend(loc=2)
    plt.show()
# bins = (bins[:-1] + bins[1:]) / 2
# plt.bar(bins_, hist, align='center', width=width, label='real')
# plt.show()

classes = ['J', 'K', 'L']
specs = ['FK', 'OE', 'FM', 'MN']

font = {'family': 'normal',
        'weight': 'bold',
        'size': 12}

matplotlib.rc('font', **font)

for sp in specs:
    for cl in classes:
        data = win.getData(spec=sp, class_=cl)
        hist, bins = np.histogram(
            data, bins=[2, 2.5, 3, 3.5, 4, 4.5, 5, 5.5], range=(1.0, 6.0))
        bins_ = bins
        bins = (bins[:-1] + bins[1:]) / 2
        # print(bins)
        # print(hist)
        plt.plot(bins, hist)
        # plt.show()
        # figure = plt.gcf()  # get current figure
        # figure.set_size_inches(12, 12)
        # plt.savefig('/home/luke/Documents/APPA/img/hists/' +
        #             "grades_" + cl + "_" + sp + '.png', bbox_inches='tight',
        #             dpi=300)
        # plt.clf()
plt.show()
import window_s_p_ft as win
import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA, KernelPCA


def removeNZ(arr, row=1):
    mask = np.any(np.equal(arr, None) | np.equal(arr, 0), axis=row)
    return arr[~mask]

meanop = False

cl = None
data = win.getData(class_=cl, spare=True)

x = [1, 2, 3, 4, 5]

if meanop:
    y = np.zeros((len(data), 5))
    for ii in x:
        data = win.getData(sems='=' + str(ii), class_=cl, spare=True)
        for jj, s in enumerate(data):
            try:
                if np.mean(s) >= 3.0:
                    y[jj, ii - 1] = np.mean(s)
                else:
                    raise
            except:
                y[jj, ii - 1] = 0.
    y = removeNZ(y)
else:
예제 #11
0
#     # accumulate([1,2,3,4,5]) --> 1 3 6 10 15
#     # accumulate([1,2,3,4,5], operator.mul) --> 1 2 6 24 120
#     it = iter(iterable)
#     try:
#         total = next(it)
#     except StopIteration:
#         return
#     yield total
#     for element in it:
#         total = func(total, element)
#         yield total


'''PCA'''
pca = PCA(whiten=False)
data = win.getData()
# win.shuffleIt(data, 2)
# win.shuffleIt(data, 1)
pca.fit(data)
'''plot'''
# expl_var_cumultative = [x / max(list(accumulate(pca.explained_variance_))) for x in list(accumulate(pca.explained_variance_))]
# plt.plot(range(len(pca.explained_variance_)), expl_var_cumultative,
# 'bo-', label="real")
ndims = range(1, len(pca.explained_variance_ratio_)+1)
expl_var_cumultative = list(accumulate(pca.explained_variance_ratio_))
plt.plot(ndims, expl_var_cumultative, 'r*', label='real - after PCA', markersize=10)
#
var = []
for x in range(27):
    var.append(pca.get_covariance()[x][x])
var_r = var / sum(var)
    'Laboratorium fizyki 2': ('Physics Laboratory 2', 'PL2'),
    'Analiza matematyczna 1': ('Mathematical Analysis 1', 'MA1'),
    'Mechanika': ('Mechanics', 'M')
}

font = {'family': 'normal',
        'weight': 'bold',
        'size': 22}

matplotlib.rc('font', **font)
matplotlib.rcParams['ps.useafm'] = True
matplotlib.rcParams['pdf.use14corefonts'] = True
matplotlib.rcParams['text.usetex'] = True
fig, ax = plt.subplots(1)

data = win.getData()
lr = LR()

'''average of a student in function of their first principal component'''
# pca_0 = PCA(n_components=1)
# data_0 = pca_0.fit_transform(data)
# s_avs = []
# for s in data:
#     s_avs.append(np.mean(s))
# lr.fit(data_0, s_avs)
# pr = lr.predict(data_0)
# plt.plot(data_0, s_avs, 'o', label='students')
# plt.plot(data_0, pr, '-', label='fit', linewidth=3.0)
# plt.legend()
# plt.xlabel('value of first principal component for a student')
# plt.ylabel('mean value of student\'s grades')