Пример #1
0
def readCROHMEdB(locationList):
    #Fa una llista amb els arxius a extreure la base de dades i ho ordena al sistema
    sdB = []
    tagClassification = {}
    os.remove('toTrain.txt')
    trainListFile = open('toTrain.txt', 'w')
    for destiny in locationList:
        filesInkML = ls(destiny)
        for filenom in filesInkML:
            if '.inkml' in filenom:
                trainListFile.write(destiny + '/' + filenom + '\n')
    trainListFile.close()
    trainListFile = open('toTrain.txt', 'r')
    linea = 'o'
    lcont = 0
    while linea != '':
        lcont += 1
        linea = trainListFile.readline()
        if linea != '':
            if os.path.exists('segmentedData/' + 'Folder000' + str(lcont)):
                shutil.rmtree('segmentedData/' + 'Folder000' + str(lcont))
            os.makedirs('segmentedData/' + 'Folder000' + str(lcont))
            sg.main([
                'GO', linea,
                'segmentedData/' + 'Folder000' + str(lcont) + '/trainFile'
            ])
    #Escaneja els arxius de la base de dades i guarda les coordenades
    for cases in os.listdir('segmentedData'):
        filesHere = sorted(os.listdir('segmentedData/' + cases))
        filesHere.remove('trainFile_GT.txt')
        filesHere = [
            'trainFile' + str(
                sorted([
                    int(filesHere[i].strip('trainFile').strip('.inkml'))
                    for i in range(len(filesHere))
                ])[j]) + '.inkml' for j in range(len(filesHere))
        ]
        Gfile = open('segmentedData/' + cases + '/trainFile_GT.txt')
        linea = 'o'
        filco = 0
        while linea != '':
            linea = Gfile.readline()
            if linea != '':
                whereInd = linea.index(',')
                etiq = linea[whereInd + 1:].rstrip('\n')
                sdB.append(
                    ink2Traces.i2trained(
                        'segmentedData/' + cases + '/' + filesHere[filco],
                        etiq))
                filco += 1
    #Preprocessa i troba els atributs dels simbols de la base de dades
    for symbol in sdB:
        symbol.draw()
    psdB = spp.preprocessing(sdB)
    for symbol in psdB:
        symbol.computeFeatures()
        if symbol.tag not in tagClassification:
            tagClassification[symbol.tag] = []
        tagClassification[symbol.tag].append(symbol)
    return psdB, tagClassification
Пример #2
0
def readUNIPENdB(location):
    weHave = ls(location)
    if 'data' in weHave:
        sdB = []
        tagClassification = {}
        folders = ls(location + '/data')
        for curFolder in folders:
            altFol = ls(location + '/data/' + curFolder)
            for curAltFold in altFol:
                if curAltFold == 'aga':
                    dataFiles = ls(location + '/data/' + curFolder + '/' +
                                   curAltFold)
                    for curFile in dataFiles:
                        fullPath = location + '/data/' + curFolder + '/' + curAltFold + '/' + curFile
                        print fullPath + ':'
                        sdB = sdB + tds.mountDS(fullPath, 'UNIPEN')
        for symbol in sdB:
            symbol.draw()
        psdB = spp.preprocessing(sdB)
        for symbol in psdB:
            symbol.computeFeatures()
            if symbol.tag not in tagClassification:
                tagClassification[symbol.tag] = []
            tagClassification[symbol.tag].append(symbol)
        print type(tagClassification)
    return psdB, tagClassification
Пример #3
0
def readUNIPENdB(location):
	weHave=ls(location)
	if 'data' in weHave:
		sdB=[]
		tagClassification={}
		folders=ls(location+'/data')
		for curFolder in folders:
			altFol=ls(location+'/data/'+curFolder)
			for curAltFold in altFol:
				if curAltFold=='aga':
					dataFiles=ls(location+'/data/'+curFolder+'/'+curAltFold)
					for curFile in dataFiles:
						fullPath=location+'/data/'+curFolder+'/'+curAltFold+'/'+curFile
						print fullPath+':'
						sdB=sdB+tds.mountDS(fullPath,'UNIPEN')
		for symbol in sdB:
			symbol.draw()
		psdB=spp.preprocessing(sdB)
		for symbol in psdB:
			symbol.computeFeatures()
			if symbol.tag not in tagClassification:
				tagClassification[symbol.tag]=[]
			tagClassification[symbol.tag].append(symbol)
		print type(tagClassification)
	return psdB,tagClassification
Пример #4
0
def readCROHMEdB(locationList):
	#Fa una llista amb els arxius a extreure la base de dades i ho ordena al sistema
	sdB=[]
	tagClassification={}
	os.remove('toTrain.txt')
	trainListFile=open('toTrain.txt','w')
	for destiny in locationList:
		filesInkML=ls(destiny)
		for filenom in filesInkML:
			if '.inkml' in filenom:
				trainListFile.write(destiny+'/'+filenom+'\n')
	trainListFile.close()
	trainListFile=open('toTrain.txt','r')
	linea='o'
	lcont=0
	while linea!='':
		lcont+=1
		linea=trainListFile.readline()
		if linea!='':
			if os.path.exists('segmentedData/'+'Folder000'+str(lcont)):
				shutil.rmtree('segmentedData/'+'Folder000'+str(lcont))
			os.makedirs('segmentedData/'+'Folder000'+str(lcont))
			sg.main(['GO',linea,'segmentedData/'+'Folder000'+str(lcont)+'/trainFile'])
	#Escaneja els arxius de la base de dades i guarda les coordenades
	for cases in os.listdir('segmentedData'):
		filesHere=sorted(os.listdir('segmentedData/'+cases))
		filesHere.remove('trainFile_GT.txt')
		filesHere=['trainFile'+str(sorted([int(filesHere[i].strip('trainFile').strip('.inkml')) for i in range(len(filesHere))])[j])+'.inkml' for j in range(len(filesHere))]
		Gfile=open('segmentedData/'+cases+'/trainFile_GT.txt')
		linea='o'
		filco=0
		while linea!='':
			linea=Gfile.readline()
			if linea!='':
				whereInd=linea.index(',')
				etiq=linea[whereInd+1:].rstrip('\n')
				sdB.append(ink2Traces.i2trained('segmentedData/'+cases+'/'+filesHere[filco],etiq))
				filco+=1
	#Preprocessa i troba els atributs dels simbols de la base de dades
	for symbol in sdB:
		symbol.draw()
	psdB=spp.preprocessing(sdB)
	for symbol in psdB:
		symbol.computeFeatures()
		if symbol.tag not in tagClassification:
			tagClassification[symbol.tag]=[]
		tagClassification[symbol.tag].append(symbol)
	return psdB,tagClassification
Пример #5
0
genTags={}
for character in tagClassification:
	if character[:-1] not in genTags:
		genTags[character[:-1]]=[]
	genTags[character[:-1]].append([character,len(tagClassification[character])])
for onlySym in genTags:
	for i in range(len(genTags[onlySym])):
		if genTags[onlySym][i][1]<0.05*sum([genTags[onlySym][caseID][1] for caseID in range(len(genTags[onlySym]))]):
			del tagClassification[genTags[onlySym][i][0]]
			del average[genTags[onlySym][i][0]]
filenom='algb02.inkml'
Coord=ink2Traces.i2t(filenom)		#Extreure les coordenades donades pel fitxer
img,byAxis,difs=drawTraces.draw(Coord)		#Mostrar resultat obtingut i montar imatge
Symb,groupedStrokes=fileSeg.segment(Coord,byAxis,difs)		#Agrupar traces en simbols
Symb=drawRegions.drawS(Symb)
symbol=spp.preprocessing([Symb[1]])
symbol[0].computeFeatures()
#pprint(vars(symbol[0]))
#for i in range(len(symbol[0].LP)):
#	print str(symbol[0].LP[i])+', '
auxAverage=copy.deepcopy(average)
auxAverage['21'].computeFeatures()
auxAverage['r1'].computeFeatures()
os.remove('features.txt')
report=open('features.txt','w')
report.write('||||||||||||||||||||||||||||||||||||||||||||||||||||||\n')
report.write('LP:\n')
for i in range(len(tagClassification['21'])):
	for j in range(len(tagClassification['21'][i].LP)):
		report.write(str(tagClassification['21'][i].LP[j])+', ')
	report.write('\n')
Пример #6
0
for onlySym in genTags:
    for i in range(len(genTags[onlySym])):
        if genTags[onlySym][i][1] < 0.05 * sum([
                genTags[onlySym][caseID][1]
                for caseID in range(len(genTags[onlySym]))
        ]):
            del tagClassification[genTags[onlySym][i][0]]
            del average[genTags[onlySym][i][0]]
filenom = 'algb02.inkml'
Coord = ink2Traces.i2t(filenom)  #Extreure les coordenades donades pel fitxer
img, byAxis, difs = drawTraces.draw(
    Coord)  #Mostrar resultat obtingut i montar imatge
Symb, groupedStrokes = fileSeg.segment(Coord, byAxis,
                                       difs)  #Agrupar traces en simbols
Symb = drawRegions.drawS(Symb)
symbol = spp.preprocessing([Symb[1]])
symbol[0].computeFeatures()
#pprint(vars(symbol[0]))
#for i in range(len(symbol[0].LP)):
#	print str(symbol[0].LP[i])+', '
auxAverage = copy.deepcopy(average)
auxAverage['21'].computeFeatures()
auxAverage['r1'].computeFeatures()
os.remove('features.txt')
report = open('features.txt', 'w')
report.write('||||||||||||||||||||||||||||||||||||||||||||||||||||||\n')
report.write('LP:\n')
for i in range(len(tagClassification['21'])):
    for j in range(len(tagClassification['21'][i].LP)):
        report.write(str(tagClassification['21'][i].LP[j]) + ', ')
    report.write('\n')
Пример #7
0
from pprint import pprint
import elasticMatching as eM
import pytemplate as temp
import featurePonderation as fp
import pyStructural as stru

#Sistema sencer, llegeix nom del fitxer inkml a analitzar i retorna l'expressio (ex: overAll.py fitxerAClassificar.inkml)
filenom = sys.argv[1]  #Llegir el nom del fitxer InkML de la consola
Coord = ink2Traces.i2t(filenom)  #Extreure les coordenades donades pel fitxer
img, byAxis, difs = drawTraces.draw(
    Coord)  #Mostrar resultat obtingut i montar imatge
Symb, groupedStrokes = fileSeg.segment(Coord, byAxis,
                                       difs)  #Agrupar traces en simbols
Symb = drawRegions.drawS(
    Symb)  #Buscar la bounding box i el centre de cada simbol
Symb = spp.preprocessing(Symb)  #Preprocessar tots els simbols
print 'Computing features..........'
for i in range(len(Symb)):
    Symb[i].computeFeatures()  #Calcular les features de cada simbol
print 'Features extracted.'
symboldB, tagClassification, averages = temp.readTemplate(
)  #Llegeix la base de dades per extreure tots els simbols etiquetats, totes les mostres ordenades per caracter(etiqueta) i el template de cada caracter
genTags = {
}  #Busca el significat independent de cada caracter i si la proporcio del caracter respecte el significat independent es molt baixa ho considera soroll i elimina el caracter
#Elimina components sorollosos de la base de dades
for character in tagClassification:
    if character[:-1] not in genTags:
        genTags[character[:-1]] = []
    genTags[character[:-1]].append(
        [character, len(tagClassification[character])])
for onlySym in genTags: