def analyzeClusters(n_loops=1, cl=None, sp=None, shuffled=False, spShuff=False): results = {} n = n_loops bins = [i for i in drange(0.0, 1.0, 0.1)] total_hist = [0 for i in bins] data = win.getData(shuffle=shuffled, class_=cl, spec=sp) if spShuff is True: win.shuffleIt(data, mode=2) Z = hie.linkage(data, method='average', metric='correlation') D = hie.dendrogram(Z, orientation='left', no_plot=True) total_ys = [0 for d in D['dcoord']] total_z = [0 for d in Z[::-1, 2]] total_acc = [0 for d in np.diff(Z[::-1, 2], 2)] for ii in range(0, n): # for loop added to average shuffled results # data = win.getData(shuffle=True, class_='J') # labels = win.getStudents(class_=classes[0]) # labels = [str(st.class_) + " " + str(st.spec) for st in labels] Z = hie.linkage(data, method='average', metric='correlation') D = hie.dendrogram(Z, orientation='left', no_plot=True) # print(data[40, :]) # print(data[42, :]) # freq method ys = [d[1] for d in D['dcoord']] total_ys = [a + b for a, b in zip(ys, total_ys)] hist, bins = np.histogram(ys, bins=bins) total_hist = [a + b for a, b in zip(hist, total_hist)] # elbow method (sort of) z = Z[::-1, 2] total_z = [a + b for a, b in zip(z, total_z)] # inv elbow acceleration = np.diff(Z[::-1, 2], 2) # 2nd derivative of distances total_acc = [a + b for a, b in zip(acceleration, total_acc)] if ii < n - 1: # dont get new data if there wont be another loop data = win.getData(shuffle=shuffled, class_=cl, spec=sp) total_hist = [a / n for a in total_hist] total_ys = [a / n for a in total_ys] total_z = [a / n for a in total_z] total_acc = [a / n for a in total_acc] results['bins'] = (bins[:-1] + bins[1:]) / 2 results['hist'] = total_hist results['ys'] = total_ys results['z'] = total_z results['acc'] = total_acc return results
import window_s_p_ft as win import numpy as np import scipy.stats as scp data = win.getData(class_=None) co_corr = np.corrcoef(data, rowvar=0) print(np.mean(co_corr), scp.sem(co_corr, axis=None), sep=' +/- ') print('var: ', np.var(co_corr, axis=None)) n = 10000 '''c shuff''' m = 0 e = 0 v = 0 for ii in range(n): data = win.getData(class_=None) win.shuffleIt(data, 2) co_corr = np.corrcoef(data, rowvar=0) m += np.mean(co_corr) e += scp.sem(co_corr, axis=None) v += np.var(co_corr, axis=None) m /= n e /= n v /= n print(m, e, sep=' +/- ') print('var: ', v) '''s shuff''' m = 0
'Fizyka kwantowa': 'Quantum Physics', 'Fizyka statystyczna i termodynamika': 'Statistical Physics and Thermodynamics', 'Opracowanie danych doświadczalnych': 'Analysis of Experimental Data', 'Analiza matematyczna 2': 'Mathematical Analysis 2', 'Probabilistyka': 'Probability', 'Algebra z geometrią': 'Algebra and Geometry', 'Wstęp do fizyki jądrowej': 'Introduction to Nuclear Physics', 'Analiza matematyczna 3': 'Mathematical Analysis 3', 'Laboratorium fizyki 2': 'Physics Laboratory 2', 'Analiza matematyczna 1': 'Mathematical Analysis 1', 'Mechanika': 'Mechanics' } cl = 'L' co_corr = np.corrcoef(win.getData(class_=cl), rowvar=0) labels = [pl_en[x] for x in win.getCoursesNames()] mds = MDS(n_components=2, dissimilarity='precomputed') dists = np.empty((len(co_corr), len(co_corr))) for ii in range(len(labels)): for jj in range(len(labels)): dists[ii][jj] = math.sqrt(2 * (1 - co_corr[ii][jj])) pos = mds.fit(dists).embedding_ G = nx.Graph() G.add_nodes_from(range(len(labels))) textstr = "" for ii, l in enumerate(labels): textstr += str(ii) + " - " + l + "\n" for jj in range(ii + 1, len(labels)):
# sp = students[0].spec # anno = [] # anno2 = [] # for ii, st in enumerate(students): # if st.class_ != cl: # anno.append((ii - 1, students[ii - 1].class_)) # cl = st.class_ # if st.spec != sp: # anno2.append((ii - 1, students[ii - 1].spec)) # sp = st.spec # anno.append(((len(students) - 1), students[-1].class_)) # anno2.append(((len(students) - 1), students[-1].spec)) # anno = anno + anno2 """set up ranges""" students = win.getData() co_corr = np.corrcoef(students, rowvar=0) vmin, vmax = co_corr.min(), co_corr.max() win.shuffleIt(students, 2) co_corr = np.corrcoef(students, rowvar=0) vmin = vmin if co_corr.min() >= vmin else co_corr.min() vmax = vmax if co_corr.min() <= vmax else co_corr.max() students = win.getData() win.shuffleIt(students, 1) co_corr = np.corrcoef(students, rowvar=0) vmin = vmin if co_corr.min() >= vmin else co_corr.min() vmax = vmax if co_corr.min() <= vmax else co_corr.max() vmin, vmax = vmin - 0.0001, vmax + 0.0001 """plot heatmap""" font = {"family": "normal", "weight": "bold", "size": 22}
'''spec: OE, MN, FK, FM''' # students = win.getStudents(shuffle=False) # cl = students[0].class_ # sp = students[0].spec # anno = [] # anno2 = [] # for ii, st in enumerate(students): # if st.class_ != cl: # anno.append((ii - 1, students[ii - 1].class_)) # cl = st.class_ # if st.spec != sp: # anno2.append((ii - 1, students[ii - 1].spec)) # sp = st.spec # anno.append(((len(students) - 1), students[-1].class_)) # anno2.append(((len(students) - 1), students[-1].spec)) # anno = anno + anno2 students = win.getData(shuffle=False, spec='FK') # win.shuffleIt(students, 2) # win.shuffleIt(students, 1) # st_corr = pairwise_distances(students, students, 'jaccard') co_corr = pairwise_distances(students.T, students.T, 'jaccard') heatmap.plotheat(co_corr, xlabels=win.getCoursesNames(), ylabels=win.getCoursesNames(), mode='special') # heatmap.plotheat(st_corr, changeTicks=False, annotation=anno, mode='special')
import networkx as nx import matplotlib.pyplot as plt import window_s_p_ft as win import numpy as np import math from sklearn.manifold import MDS cl = 'L' data = win.getData(class_=cl) data = sorted(data, key=lambda s: np.mean(s), reverse=True) studs = win.getStudents(class_=cl) studs = sorted(studs, key=lambda s: np.mean(s.grades), reverse=True) st_corr = np.corrcoef(data, rowvar=1) mds = MDS(n_components=2, dissimilarity='precomputed') dists = np.empty((len(st_corr), len(st_corr))) for ii in range(len(data)): for jj in range(len(data)): dists[ii][jj] = math.sqrt(2 * (1 - st_corr[ii][jj])) pos = mds.fit(dists).embedding_ G = nx.Graph() G.add_nodes_from(range(len(data))) labels = [] for ii in range(len(data)): labels.append(str(ii + 1) + " " + str(studs[ii].spec)) for jj in range(ii + 1, len(data)): d = math.sqrt(2 * (1 - st_corr[ii][jj])) G.add_edge(ii, jj, weight=d)
'Podstawy elektroniki': 'Fundamentals of Electronics', 'Grafika inżynierska': 'Engineering Graphics', 'Metody matematyczne fizyki': 'Mathematical Methods of Physics', 'Elektronika w eksperymencie fizycznym': 'Electronics in Physical Experiment', 'Podstawy projektowania przyrządów wirtualnych': 'Fundamentals of Virtual Devices Design', 'Programowanie obiektowe': 'Object-Oriented Programming', 'Podstawy optyki': 'Fundamentals of Optics', 'Fizyka kwantowa': 'Quantum Physics', 'Fizyka statystyczna i termodynamika': 'Statistical Physics and Thermodynamics', 'Opracowanie danych doświadczalnych': 'Analysis of Experimental Data', 'Analiza matematyczna 2': 'Mathematical Analysis 2', 'Probabilistyka': 'Probability', 'Algebra z geometrią': 'Algebra and Geometry', 'Wstęp do fizyki jądrowej': 'Introduction to Nuclear Physics', 'Analiza matematyczna 3': 'Mathematical Analysis 3', 'Laboratorium fizyki 2': 'Physics Laboratory 2', 'Analiza matematyczna 1': 'Mathematical Analysis 1', 'Mechanika': 'Mechanics' } courses = win.getData().T courses_names = win.getCoursesNames() courses_names = [pl_en[x] for x in courses_names] co_corr = np.corrcoef(win.getData(), rowvar=0) for ii, c in enumerate(courses): av = round(np.mean(c), 2) var = round(np.var(c), 2) cij = round(sum(co_corr[ii]) - 1.0, 2) print(courses_names[ii], av, var, cij, sep=' & ')
import window_s_p_ft as win import numpy as np import scipy.stats as scp import math import matplotlib.pyplot as plt import matplotlib classes = ['J', 'K', 'L'] X = np.zeros((5, 217)) for sem in range(1, 6): counter = 0 for cl in classes: studs = win.getData(class_=cl, sems='=' + str(sem)) glav = np.mean(studs) glsig = scp.sem(studs, axis=None) for s in studs: X[sem - 1][counter] = (np.mean(s) - glav) / glsig counter += 1 plt.xlabel('xn') plt.ylabel('xn+1') for ii in range(0, 4): plt.plot(X[ii, :], X[ii + 1, :], 'o', label='n='+str(ii+1)+',n+1='+str(ii+1+1)) plt.legend(loc=2) plt.show()
# bins = (bins[:-1] + bins[1:]) / 2 # plt.bar(bins_, hist, align='center', width=width, label='real') # plt.show() classes = ['J', 'K', 'L'] specs = ['FK', 'OE', 'FM', 'MN'] font = {'family': 'normal', 'weight': 'bold', 'size': 12} matplotlib.rc('font', **font) for sp in specs: for cl in classes: data = win.getData(spec=sp, class_=cl) hist, bins = np.histogram( data, bins=[2, 2.5, 3, 3.5, 4, 4.5, 5, 5.5], range=(1.0, 6.0)) bins_ = bins bins = (bins[:-1] + bins[1:]) / 2 # print(bins) # print(hist) plt.plot(bins, hist) # plt.show() # figure = plt.gcf() # get current figure # figure.set_size_inches(12, 12) # plt.savefig('/home/luke/Documents/APPA/img/hists/' + # "grades_" + cl + "_" + sp + '.png', bbox_inches='tight', # dpi=300) # plt.clf() plt.show()
import window_s_p_ft as win import numpy as np import matplotlib.pyplot as plt from sklearn.decomposition import PCA, KernelPCA def removeNZ(arr, row=1): mask = np.any(np.equal(arr, None) | np.equal(arr, 0), axis=row) return arr[~mask] meanop = False cl = None data = win.getData(class_=cl, spare=True) x = [1, 2, 3, 4, 5] if meanop: y = np.zeros((len(data), 5)) for ii in x: data = win.getData(sems='=' + str(ii), class_=cl, spare=True) for jj, s in enumerate(data): try: if np.mean(s) >= 3.0: y[jj, ii - 1] = np.mean(s) else: raise except: y[jj, ii - 1] = 0. y = removeNZ(y) else:
# # accumulate([1,2,3,4,5]) --> 1 3 6 10 15 # # accumulate([1,2,3,4,5], operator.mul) --> 1 2 6 24 120 # it = iter(iterable) # try: # total = next(it) # except StopIteration: # return # yield total # for element in it: # total = func(total, element) # yield total '''PCA''' pca = PCA(whiten=False) data = win.getData() # win.shuffleIt(data, 2) # win.shuffleIt(data, 1) pca.fit(data) '''plot''' # expl_var_cumultative = [x / max(list(accumulate(pca.explained_variance_))) for x in list(accumulate(pca.explained_variance_))] # plt.plot(range(len(pca.explained_variance_)), expl_var_cumultative, # 'bo-', label="real") ndims = range(1, len(pca.explained_variance_ratio_)+1) expl_var_cumultative = list(accumulate(pca.explained_variance_ratio_)) plt.plot(ndims, expl_var_cumultative, 'r*', label='real - after PCA', markersize=10) # var = [] for x in range(27): var.append(pca.get_covariance()[x][x]) var_r = var / sum(var)
'Laboratorium fizyki 2': ('Physics Laboratory 2', 'PL2'), 'Analiza matematyczna 1': ('Mathematical Analysis 1', 'MA1'), 'Mechanika': ('Mechanics', 'M') } font = {'family': 'normal', 'weight': 'bold', 'size': 22} matplotlib.rc('font', **font) matplotlib.rcParams['ps.useafm'] = True matplotlib.rcParams['pdf.use14corefonts'] = True matplotlib.rcParams['text.usetex'] = True fig, ax = plt.subplots(1) data = win.getData() lr = LR() '''average of a student in function of their first principal component''' # pca_0 = PCA(n_components=1) # data_0 = pca_0.fit_transform(data) # s_avs = [] # for s in data: # s_avs.append(np.mean(s)) # lr.fit(data_0, s_avs) # pr = lr.predict(data_0) # plt.plot(data_0, s_avs, 'o', label='students') # plt.plot(data_0, pr, '-', label='fit', linewidth=3.0) # plt.legend() # plt.xlabel('value of first principal component for a student') # plt.ylabel('mean value of student\'s grades')