Пример #1
0
def preprocess(path='./json'):
    
    poems = prepareData(path)
    poems = traditional2simplified(poems)
    vocab = prepareVocab(poems)
    fullData = word2idx(poems, vocab)
    fullData = torch.from_numpy(fullData)
    trainSize = int(0.8 * len(fullData))
    testSize = len(fullData) - trainSize
    trainSet, devSet = torch.utils.data.random_split(fullData, [trainSize, testSize])
    return trainSet, devSet, vocab
Пример #2
0
def verboseOutput(params, scidict):
	"""
		Perform Calculations and then output
		Driver File for CP execution
	"""

	#copied from verboseError.py
	scidict = prepareData(params, scidict)

	#compute p values
	pValMat = p2mat.computePVal(scidict['trainData'],scidict['testData'],scidict['trainLabels'],scidict['testLabels'], params)

	print "printing: point | p-value by class | true label"
	size = pValMat.shape
	for point in xrange(size[0]):
		print point,pValMat[point,:],scidict['testTarget'][point]


	# verbose results
	errorMat = np.zeros( (len(params['confList']),len(['emptyError','predError','multiError'])) )
	errorSourceMat = [ [ [],[],[] ] for i in xrange(len(params['confList'])) ]

	errorArray = np.zeros( (len(['emptyError','predError','multiError']),len(pValMat),len(params['confList'])) )
	errorArraySource = [ [ [],[],[] ] for i in xrange(len(params['confList'])) ]

	####################################################################
	#	Write code here for computing errorArraySource



	####################################################################

	#print source results
	print "printing: confidence level | errors by type | points causing error"
	for i in xrange(len(params['confList'])):
		errorSum = np.sum(errorArray[:,:,i],axis=1)
		print 1-params['confList'][i],errorSum,errorArraySource[i][1]

	# which points at 95% confidence have prediction error
	logical = errorArray[1,:,5] == 1
	size = pValMat.shape
	for point in xrange(len(pValMat)):
		if logical[point] == 1:
			print point,pValMat[point,:],scidict['testLabels'][point]

	ep.errorPlot(errorArray,params['confList'])
Пример #3
0
def RS():
    #0
    indexes, X, L = prepareData('files/warner.txt', None, 2)   # pobranie danych
    N = len(X)                                               # N = ilość badanych danych
    AVG = []                                                 # tablica, w której zbieramy końcowe wyniki dla każdego n
    for n in L:                                              # (1)
        R_S = []                                             # wartości R/S dla serii o długości n
        R = []                                               # zbiór największych różnic odchyleń w przedziałach długości n
        S = []                                               # zbiór wartości odchyleń standardowych dla przedziałów o długości n
        i = 0
        while i <= N - n:                                    # (2)
            segment = X[i:i+n]                               # wybranie kolejnego segmentu o długości n
            m = np.average(segment)                        	 # wyliczenie średniej dla wybranego segmentu o długości n
            Y = []                                           # Seria odchyleń dla danego segmentu
            Z = []                                           # tablica sum odchyleń dla wszystkich serii o długości n
            for s in range(i, i+n):                          # (3)
                Y.append(X[s] - m)
                Z.append(np.sum(Y))                          # zapisanie pełnego odchylenia średniej dla przedziału
            
            R.append(max(Z) - min(Z))                        # (6) Największa rónica odchyleń dla zbadanego podziału
            S.append(satndardDeviation(n, Y))                # (4) Odchylenie standardowe dla wyznaczonego przedziału
                                                             # (5)
            i += n                                           # wybranie początku następnego przedziału o długości n

        for r, s in zip(R, S):                               # (7)
            if s != 0:
                R_S.append(r/s)                              # (8) wyznaczenie R/S dla każdego przedziału o długości n
        
        AVG.append(np.average(R_S))                          # (9) zapisanie średniej ze wszystkich zebranych wartości R_S[n]

    plt.scatter(np.log(L), np.log(AVG), s=10)
    plt.title('RS warner')
    plt.ylabel('log((R/S)/n)')
    plt.xlabel('log(n)')
    result = np.polyfit(np.log(L), np.log(AVG), 1)
    print('alfa = ', result[0])

    plt.text(4, 2.75, '\u03B1 = {}'.format(round(result[0], 2)))
    x1 = np.log(L[0])
    x2 = np.log(L[-1])
    plt.plot([np.log(L[0]), np.log(L[-1])], [result[0] * x1 + result[1], result[0] * x2 + result[1]], 'red')
    plt.show()
Пример #4
0
def DMA():
    # 0
    indexes, X, L = prepareData('files/warner.txt', 50, 4)
    N = len(X)

    modifiedStandardDeviation = []
    for n in L:
        # 1 srednia ruchoma dla przedziałów dlugosci n
        i = n - 1
        movingAverage = X.copy()
        while i < N:
            cumulative_sum = 0.0
            for k in range(0, n):
                cumulative_sum += X[i - k]
            movingAverage[i] = cumulative_sum / n
            i += 1

        #2
        sum = 0.0
        for i in range(n, N):
            sum += (X[i - 1] - movingAverage[i - 1]) * (X[i - 1] -
                                                        movingAverage[i - 1])
        modifiedStandardDeviation.append(np.sqrt(sum / (N - n)))

    # 3 double logaritmic plot
    plt.scatter(np.log(L), np.log(modifiedStandardDeviation), s=20)
    plt.title('DMA warner')
    plt.ylabel(r'log($\sigma_{DMA}(n)$)')
    plt.xlabel('log(n)')

    result = np.polyfit(np.log(L), np.log(modifiedStandardDeviation), 1)
    print('alfa = ', result[0])
    print(result)

    plt.text(1.85, -0.05, '\u03B1 = {}'.format(round(result[0], 2)))
    x1 = np.log(L[0])
    x2 = np.log(L[-1])
    plt.plot([np.log(L[0]), np.log(L[-1])],
             [result[0] * x1 + result[1], result[0] * x2 + result[1]], 'red')
    plt.show()
subjectName = 'yaleB02' #debug, yaleB01, yaleB02, yaleB05, yaleB07
numImages = 128
writeOutput = True
data_dir = os.path.join('..', 'data')
out_dir = os.path.join('..', 'output', 'photometricStereo')
image_dir = os.path.join(data_dir, 'photometricStereo', subjectName)
integrationMethod = 'random'
mkdir(out_dir)

if subjectName == 'debug':
    imageSize = (64, 64)
    (ambientImage, imArray, lightDirs, trueAlbedo, trueSurfaceNormals, trueHeightMap) = toyExample(imageSize, numImages)
else:
    (ambientImage, imArray, lightDirs) = loadFaceImages(image_dir, subjectName, numImages)

imArray = prepareData(imArray, ambientImage)

(albedoImage, surfaceNormals) = photometricStereo(imArray, lightDirs)

heightMap = getSurface(surfaceNormals, integrationMethod)

displayOutput(albedoImage, heightMap)

plotSurfaceNormals(surfaceNormals)

if subjectName == 'debug':
    displayOutput(trueAlbedo, trueHeightMap)
    plotSurfaceNormals(trueSurfaceNormals)

if writeOutput:
    imageName = os.path.join(out_dir, '{}_albedo.jpg'.format(subjectName))
Пример #6
0
from __future__ import unicode_literals, print_function, division
from io import open
import unicodedata
import string
import re
import random
import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F
import time

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

SOS_token = 0
EOS_token = 1

MAX_LENGTH = 10

eng_prefixes = ("i am ", "i m ", "he is", "he s ", "she is", "she s",
                "you are", "you re ", "we are", "we re ", "they are",
                "they re ")

from prepareData import *
input_lang, output_lang, pairs = prepareData('eng', 'fra', True)
print(random.choice(pairs))

teacher_forcing_ratio = 0.5
Пример #7
0
def DFA():
    # 0
    indexes, D, L = prepareData('./files/warner.txt', None, 2)
    N = len(D)

    # 1 średnia wszystkich danych
    avg = np.average(D)

    # 2 zmiana danych na random walk
    randomWalk = []
    cumulative_sum = 0.0
    for i in range(0, N):
        cumulative_sum += D[i] - avg
        randomWalk.append(cumulative_sum)

    # petla do wybierania długości segmentów
    F_avg = []
    for segment_size in L:
        # plt.plot(indexes, randomWalk)
        # plt.title('warner')

        # 3
        Y = randomWalk.copy()
        X = indexes.copy()
        i = 0
        k = indexes[0]
        F = []
        while i <= N - segment_size:
            # 4 znalezienie prostej w segmencie: line[0]=a; line[1]=b;
            line = np.polyfit(X[0:segment_size], Y[0:segment_size], 1)

            del Y[0:segment_size]
            # plt.plot([X[0], X[segment_size-1]],
            #          [line[0] * X[0] + line[1], line[0] * X[segment_size - 1] + line[1]], 'r')
            del X[0:segment_size]

            # 5 wyliczenie F
            F.append(calculateF(line, segment_size, i, k, randomWalk))
            k = k + segment_size
            i = i + segment_size
        # plt.show()

        # 6 obliczenie sredniej fluktuacji dla danej dlugosci segmentu
        F_avg.append(np.average(F))

    # 7 double logaritmic plot
    plt.scatter(np.log(L), np.log(F_avg), s=20)
    plt.title('DFA warner')
    plt.ylabel(r'$log(\tilde{F}(n))$')
    plt.xlabel('log(n)')

    result = np.polyfit(np.log(L), np.log(F_avg), 1)
    print('alfa = ', result[0])
    print(result)

    plt.text(4, 3.25, '\u03B1 = {}'.format(round(result[0], 2)))
    x1 = np.log(L[0])
    x2 = np.log(L[-1])
    plt.plot([np.log(L[0]), np.log(L[-1])],
             [result[0] * x1 + result[1], result[0] * x2 + result[1]], 'red')
    plt.show()
Пример #8
0
	target[i] = preData[i][1]

scidict={'data':data,'target':target}

params={
    'cpType':1,
    'numClasses':2,
    'confList':[.5,.4,.3,.2,.1,.05,.01],
    'trainPortion':.75,
    'calibPortion':.25,
    'classifier':3,
    'flavor':None,
    'k':None,
    }

scidict = prepareData(scidict, params)

if params['cpType'] != 2:
	#not inductive
	pValMat = p2mat.computePVal(scidict.data,scidict.testData,scidict.target,scidict.testTarget,params)

else:
	#inductive
	pValMat = p2mat.computePVal(scidict.data,scidict.testData,scidict.target,scidict.testTarget,params,scidict.calibIndices)

for i in xrange(len(pValMat)):
	print pValMat[i,:], scidict.testTarget[i]

#for manual check of results
#for i in xrange(len(pValMat)):
#	print pValMat[i,:],scidict.testTarget[i]
Пример #9
0
from prepareData import *


if __name__ == '__main__':
    prepareData()