Exemplo n.º 1
0
def extractDataFilesAndFeatures(initialFolderpath,finalFolderPath, resultFolderPath):
    csvTextualFeature = csvLineMaker(getTextualHeaderFeatures())+'\n'
    csvTemporalFeature = csvLineMaker(getTemporalHeaderFeatures())+'\n'
    csvClassFeature = csvLineMaker(getClassHeaderFeatures())+'\n'
    fileNames = getFilesFromFolder(initialFolderpath)
    for fileName in fileNames: 
        content, textFeatures, temporalFeatures = extractTextAndFeatures(initialFolderpath+'/'+fileName)
        info = fileName.split('.')
        userID = str(info[0])
        gender = str(info[1])
        age = str(info[2])
        ageClass = str(extractAgeClass(float(age)))
        job = str(info[3])
        place = str(info[4])
        saveFile(finalFolderPath+'/'+userID+'.txt', content)
        csvTextLine= csvLineMaker(textFeatures)
        classFeatures = []
        classFeatures.append(userID)
        classFeatures.append(age)
        classFeatures.append(gender)
        classFeatures.append(job)
        classFeatures.append(place)
        classFeatures.append(ageClass)
        csvClassLine = csvLineMaker(classFeatures)
        csvTemporalLine= csvLineMaker(temporalFeatures)
        csvClassFeature = csvClassFeature+csvClassLine+'\n'
        csvTextualFeature = csvTextualFeature+csvTextLine+'\n'
        csvTemporalFeature = csvTemporalFeature+csvTemporalLine+'\n'
    saveFile(resultFolderPath+'/textualFeature.csv', csvTextualFeature)  
    saveFile(resultFolderPath+'/temporalFeature.csv', csvTemporalFeature)
    saveFile(resultFolderPath+'/classFeature.csv', csvClassFeature)

    
    
Exemplo n.º 2
0
def tenFoldSplitDir(src_path, dst_path):
	files = getFilesFromFolder(src_path)
	kf = KFold(len(files), n_folds = 10)

	dir_iter = 1

	for train, test in kf:
		
		train_dest = dst_path + "train-set" + str(dir_iter) + "/"
		test_dest = dst_path + "test-set" + str(dir_iter) + "/"

		if not os.path.exists(train_dest):
			os.makedirs(train_dest)
		if not os.path.exists(test_dest):
			os.makedirs(test_dest)

		for train_index in train:
			copy(src_path + files[train_index], train_dest)
		for test_index in test:
			copy(src_path + files[test_index], test_dest)

		dir_iter+=1