Exemplo n.º 1
0
class NetworkDataUtil(object):
    '''
    Class to load and separate data for a neural network 
    '''

    def __init__(self, files=[]):
        self.files = files
        self.fileUtil = CSVUtil()

    def makeSameLength(self, values0, values1):
        length = min(len(values0), len(values1)) - 1
        return values0[:length], values1[:length]

    def get(self, separate=True, makeSameLength=True):
        values0, values1 = self.readFiles(self.files)
        if makeSameLength and (len(values0) != len(values1)):
            values0, values1 = self.makeSameLength(values0, values1)

        if separate:
            return self.buildTestSet(values0, values1)
        else:
            return self.buildFullTestSet(values0, values1)

    def getNInput(self):
        return self.nInputs

    def readFiles(self, files):
        return self.fileUtil.readData(files[0], ","), self.fileUtil.readData(files[1], ",")

    def buildFullTestSet(self, values0, values1):
        values0 = self._addClass(values0, 0.)
        values1 = self._addClass(values1, 1.)
        rawData = self._postprocData(values0, values1)
        self.nInputs = len(rawData[0])-1

        return self.createData(self.nInputs, rawData)

    def buildTestSet(self, values0, values1):
        train0, test0 = self._preprocData(values0, 0.)
        train1, test1 = self._preprocData(values1, 1.)

        trainRawData = self._postprocData(train0, train1)
        testRawData = self._postprocData(test0, test1)
        self.nInputs = len(trainRawData[0])-1

        trainData = self.createData(self.nInputs, trainRawData)
        testData = self.createData(self.nInputs, testRawData)
        return trainData, testData

    def _preprocData(self, values, clazz):
        np.random.shuffle(values)
        values0 = self._addClass(values, clazz)
        return self._separateData(values0) 

    def _addClass(self, values, clazz):
        shape = values.shape
        clazzArray = np.full((shape[0], shape[1]+1), clazz)
        clazzArray[:,:-1] = values
        return clazzArray

    def _separateData(self, values):
        l = len(values)
        d = (2 * l / 3)
        return values[0:d], values[d:]

    def _postprocData(self, values0, values1):
        values = np.concatenate((values0, values1), axis=0)
        np.random.shuffle(values)
        return values

    def createXORData(self):
        values = [[0, 0, 0], [0, 1, 1], [1, 0, 1], [1, 1, 0]]
        return self.createData(2, values)

    def createData(self, nInput, values):
        ds = SupervisedDataSet(nInput, N_OUTPUT)
        for value in values:
            ds.addSample(value[:nInput], value[nInput])
        return ds
Exemplo n.º 2
0
class CSVUtilTest(BaseTest):
    def setUp(self):
        self.reader = CSVUtil()

    def test_readData(self):
        file_path = self.getData32CSV()
        self.reader.readData(file_path)

    def test_readHeader(self):
        file_path = self.getData32CSV()
        self.reader.readHeader(file_path)

    def testreadEEGFile(self):
        file_path = self.getData32CSV()
        self.reader.readEEGFile(file_path)

    def test_writeFile(self):
        filePath = self.PATH + "test.csv"
        header = ["Timestamp", "B", "C"]
        data = np.array([[1, 1.123456789, 2], [2, -4.123456789, 6],
                         [3, 7.123456789, 99.123]])
        self.reader.writeFile(filePath, data, header)

        if isfile(filePath):
            read = self.reader.readEEGFile(filePath)

            for i in range(len(data)):
                for j in range(len(data[i])):
                    self.assertAlmostEqual(data[i, j],
                                           read.data[i, j],
                                           delta=0.001)

        self.removeFile(filePath)

    def test_writeStructredFile(self):
        filePath = self.PATH + "test_structured.csv"
        data = {
            "A": {
                "value": [1, 2, 3],
                "quality": [-1, -1, -1]
            },
            "B": {
                "value": [4, 5, 6],
                "quality": [-2, -2, -2]
            },
            "C": {
                "value": [7, 8, 9],
                "quality": [-3, -3, -3]
            }
        }
        self.reader.writeStructredFile(filePath, data)

        if isfile(filePath):
            read = self.reader.readEEGFile(filePath)
            for key, values in data.iteritems():
                assert_array_equal(values["value"], read.getColumn(key))
        self.removeFile(filePath)

    @unittest.skip("There should be no empty values")
    def test_readEEGFile_NaNValues(self):
        eegData = self.reader.readEEGFile(self.PATH + "example_32_empty.csv")
        emptyCol = eegData.getColumn("Y")
        self.assertTrue(np.isnan(emptyCol).any())

        nonEmptyCol = eegData.getColumn("F3")
        self.assertFalse(np.isnan(nonEmptyCol).any())

    def test_readEEGFile_SeparatorFallback(self):
        eegData = self.reader.readEEGFile(self.getData32CSV())
        semicolonData = eegData.getColumn("F3")

        eegData = self.reader.readEEGFile(self.PATH + "example_32_comma.csv")
        commaData = eegData.getColumn("F3")

        self.assertTrue((semicolonData == commaData).all())

    @unittest.skip("delete Z-Column leads to memory error")
    def test_readEEGFile_newStyle(self):
        _ = self.reader.readEEGFile(self.PATH + "example_1024_new.csv")

    def test_readEEGFile(self):
        self.eegData = self.reader.readEEGFile(self.getData32CSV())
        self.assertTrue(self.eegData.hasEEGData)
        self.assertFalse(self.eegData.hasECGData)

    def test_readECGFile(self):
        self.ecgData = self.reader.readECGFile(self.PATH +
                                               "example_4096_ecg.csv")
        self.assertFalse(self.ecgData.hasEEGData)
        self.assertTrue(self.ecgData.hasECGData)

    def test_transformTimestamp_ecg(self):
        header = ["Timestamp", "ECG"]
        data = np.array([["05/12/2016 13:58:59.407", "3798"],
                         ["05/12/2016 13:58:59.408", "3798"],
                         ["05/12/2016 13:58:59.409", "3798"],
                         ["05/12/2016 13:58:59.410", "3798"],
                         ["05/12/2016 13:58:59.411", "3798"]])
        ecgData = self.reader.transformTimestamp(header, data)
        self.assertAlmostEquals(float(ecgData[0][0]), 1480942739., delta=1.)

    def test_transformTimestamp_eeg(self):
        header = ["Timestamp", "F3", "X"]
        data = np.array([["2016-12-19 08:18:38.415000", "-3200", "0"],
                         ["2016-12-19 08:18:38.423000", "-3171", "0"],
                         ["2016-12-19 08:18:38.430000", "-3184", "0"],
                         ["2016-12-19 08:18:38.438000", "-3176", "0"],
                         ["2016-12-19 08:18:38.446000", "-3172", "0"]])
        eegData = self.reader.transformTimestamp(header, data)
        self.assertAlmostEquals(float(eegData[0][0]), 1482131918., delta=1.)