def preprocess(path='./json'): poems = prepareData(path) poems = traditional2simplified(poems) vocab = prepareVocab(poems) fullData = word2idx(poems, vocab) fullData = torch.from_numpy(fullData) trainSize = int(0.8 * len(fullData)) testSize = len(fullData) - trainSize trainSet, devSet = torch.utils.data.random_split(fullData, [trainSize, testSize]) return trainSet, devSet, vocab
def verboseOutput(params, scidict): """ Perform Calculations and then output Driver File for CP execution """ #copied from verboseError.py scidict = prepareData(params, scidict) #compute p values pValMat = p2mat.computePVal(scidict['trainData'],scidict['testData'],scidict['trainLabels'],scidict['testLabels'], params) print "printing: point | p-value by class | true label" size = pValMat.shape for point in xrange(size[0]): print point,pValMat[point,:],scidict['testTarget'][point] # verbose results errorMat = np.zeros( (len(params['confList']),len(['emptyError','predError','multiError'])) ) errorSourceMat = [ [ [],[],[] ] for i in xrange(len(params['confList'])) ] errorArray = np.zeros( (len(['emptyError','predError','multiError']),len(pValMat),len(params['confList'])) ) errorArraySource = [ [ [],[],[] ] for i in xrange(len(params['confList'])) ] #################################################################### # Write code here for computing errorArraySource #################################################################### #print source results print "printing: confidence level | errors by type | points causing error" for i in xrange(len(params['confList'])): errorSum = np.sum(errorArray[:,:,i],axis=1) print 1-params['confList'][i],errorSum,errorArraySource[i][1] # which points at 95% confidence have prediction error logical = errorArray[1,:,5] == 1 size = pValMat.shape for point in xrange(len(pValMat)): if logical[point] == 1: print point,pValMat[point,:],scidict['testLabels'][point] ep.errorPlot(errorArray,params['confList'])
def RS(): #0 indexes, X, L = prepareData('files/warner.txt', None, 2) # pobranie danych N = len(X) # N = ilość badanych danych AVG = [] # tablica, w której zbieramy końcowe wyniki dla każdego n for n in L: # (1) R_S = [] # wartości R/S dla serii o długości n R = [] # zbiór największych różnic odchyleń w przedziałach długości n S = [] # zbiór wartości odchyleń standardowych dla przedziałów o długości n i = 0 while i <= N - n: # (2) segment = X[i:i+n] # wybranie kolejnego segmentu o długości n m = np.average(segment) # wyliczenie średniej dla wybranego segmentu o długości n Y = [] # Seria odchyleń dla danego segmentu Z = [] # tablica sum odchyleń dla wszystkich serii o długości n for s in range(i, i+n): # (3) Y.append(X[s] - m) Z.append(np.sum(Y)) # zapisanie pełnego odchylenia średniej dla przedziału R.append(max(Z) - min(Z)) # (6) Największa rónica odchyleń dla zbadanego podziału S.append(satndardDeviation(n, Y)) # (4) Odchylenie standardowe dla wyznaczonego przedziału # (5) i += n # wybranie początku następnego przedziału o długości n for r, s in zip(R, S): # (7) if s != 0: R_S.append(r/s) # (8) wyznaczenie R/S dla każdego przedziału o długości n AVG.append(np.average(R_S)) # (9) zapisanie średniej ze wszystkich zebranych wartości R_S[n] plt.scatter(np.log(L), np.log(AVG), s=10) plt.title('RS warner') plt.ylabel('log((R/S)/n)') plt.xlabel('log(n)') result = np.polyfit(np.log(L), np.log(AVG), 1) print('alfa = ', result[0]) plt.text(4, 2.75, '\u03B1 = {}'.format(round(result[0], 2))) x1 = np.log(L[0]) x2 = np.log(L[-1]) plt.plot([np.log(L[0]), np.log(L[-1])], [result[0] * x1 + result[1], result[0] * x2 + result[1]], 'red') plt.show()
def DMA(): # 0 indexes, X, L = prepareData('files/warner.txt', 50, 4) N = len(X) modifiedStandardDeviation = [] for n in L: # 1 srednia ruchoma dla przedziałów dlugosci n i = n - 1 movingAverage = X.copy() while i < N: cumulative_sum = 0.0 for k in range(0, n): cumulative_sum += X[i - k] movingAverage[i] = cumulative_sum / n i += 1 #2 sum = 0.0 for i in range(n, N): sum += (X[i - 1] - movingAverage[i - 1]) * (X[i - 1] - movingAverage[i - 1]) modifiedStandardDeviation.append(np.sqrt(sum / (N - n))) # 3 double logaritmic plot plt.scatter(np.log(L), np.log(modifiedStandardDeviation), s=20) plt.title('DMA warner') plt.ylabel(r'log($\sigma_{DMA}(n)$)') plt.xlabel('log(n)') result = np.polyfit(np.log(L), np.log(modifiedStandardDeviation), 1) print('alfa = ', result[0]) print(result) plt.text(1.85, -0.05, '\u03B1 = {}'.format(round(result[0], 2))) x1 = np.log(L[0]) x2 = np.log(L[-1]) plt.plot([np.log(L[0]), np.log(L[-1])], [result[0] * x1 + result[1], result[0] * x2 + result[1]], 'red') plt.show()
subjectName = 'yaleB02' #debug, yaleB01, yaleB02, yaleB05, yaleB07 numImages = 128 writeOutput = True data_dir = os.path.join('..', 'data') out_dir = os.path.join('..', 'output', 'photometricStereo') image_dir = os.path.join(data_dir, 'photometricStereo', subjectName) integrationMethod = 'random' mkdir(out_dir) if subjectName == 'debug': imageSize = (64, 64) (ambientImage, imArray, lightDirs, trueAlbedo, trueSurfaceNormals, trueHeightMap) = toyExample(imageSize, numImages) else: (ambientImage, imArray, lightDirs) = loadFaceImages(image_dir, subjectName, numImages) imArray = prepareData(imArray, ambientImage) (albedoImage, surfaceNormals) = photometricStereo(imArray, lightDirs) heightMap = getSurface(surfaceNormals, integrationMethod) displayOutput(albedoImage, heightMap) plotSurfaceNormals(surfaceNormals) if subjectName == 'debug': displayOutput(trueAlbedo, trueHeightMap) plotSurfaceNormals(trueSurfaceNormals) if writeOutput: imageName = os.path.join(out_dir, '{}_albedo.jpg'.format(subjectName))
from __future__ import unicode_literals, print_function, division from io import open import unicodedata import string import re import random import torch import torch.nn as nn from torch import optim import torch.nn.functional as F import time device = torch.device("cuda" if torch.cuda.is_available() else "cpu") SOS_token = 0 EOS_token = 1 MAX_LENGTH = 10 eng_prefixes = ("i am ", "i m ", "he is", "he s ", "she is", "she s", "you are", "you re ", "we are", "we re ", "they are", "they re ") from prepareData import * input_lang, output_lang, pairs = prepareData('eng', 'fra', True) print(random.choice(pairs)) teacher_forcing_ratio = 0.5
def DFA(): # 0 indexes, D, L = prepareData('./files/warner.txt', None, 2) N = len(D) # 1 średnia wszystkich danych avg = np.average(D) # 2 zmiana danych na random walk randomWalk = [] cumulative_sum = 0.0 for i in range(0, N): cumulative_sum += D[i] - avg randomWalk.append(cumulative_sum) # petla do wybierania długości segmentów F_avg = [] for segment_size in L: # plt.plot(indexes, randomWalk) # plt.title('warner') # 3 Y = randomWalk.copy() X = indexes.copy() i = 0 k = indexes[0] F = [] while i <= N - segment_size: # 4 znalezienie prostej w segmencie: line[0]=a; line[1]=b; line = np.polyfit(X[0:segment_size], Y[0:segment_size], 1) del Y[0:segment_size] # plt.plot([X[0], X[segment_size-1]], # [line[0] * X[0] + line[1], line[0] * X[segment_size - 1] + line[1]], 'r') del X[0:segment_size] # 5 wyliczenie F F.append(calculateF(line, segment_size, i, k, randomWalk)) k = k + segment_size i = i + segment_size # plt.show() # 6 obliczenie sredniej fluktuacji dla danej dlugosci segmentu F_avg.append(np.average(F)) # 7 double logaritmic plot plt.scatter(np.log(L), np.log(F_avg), s=20) plt.title('DFA warner') plt.ylabel(r'$log(\tilde{F}(n))$') plt.xlabel('log(n)') result = np.polyfit(np.log(L), np.log(F_avg), 1) print('alfa = ', result[0]) print(result) plt.text(4, 3.25, '\u03B1 = {}'.format(round(result[0], 2))) x1 = np.log(L[0]) x2 = np.log(L[-1]) plt.plot([np.log(L[0]), np.log(L[-1])], [result[0] * x1 + result[1], result[0] * x2 + result[1]], 'red') plt.show()
target[i] = preData[i][1] scidict={'data':data,'target':target} params={ 'cpType':1, 'numClasses':2, 'confList':[.5,.4,.3,.2,.1,.05,.01], 'trainPortion':.75, 'calibPortion':.25, 'classifier':3, 'flavor':None, 'k':None, } scidict = prepareData(scidict, params) if params['cpType'] != 2: #not inductive pValMat = p2mat.computePVal(scidict.data,scidict.testData,scidict.target,scidict.testTarget,params) else: #inductive pValMat = p2mat.computePVal(scidict.data,scidict.testData,scidict.target,scidict.testTarget,params,scidict.calibIndices) for i in xrange(len(pValMat)): print pValMat[i,:], scidict.testTarget[i] #for manual check of results #for i in xrange(len(pValMat)): # print pValMat[i,:],scidict.testTarget[i]
from prepareData import * if __name__ == '__main__': prepareData()