Пример #1
0
    def test_index(self):
        expectedIndex = 1

        # testing dataframe
        dict = {
            'test1': R.IntVector((12, 12, 15)),
            'test2': R.IntVector((32, 4, 12)),
            'test3': R.IntVector((3, 12, 26))
        }  # note that test1 has 12 in row 1 and row 2
        testFrame = R.DataFrame(dict)
        # because the dict from which the dataframe is made is not ordened, the testframe is ordened first to make sure that the column 'test2' is always at the same position
        # however, to do tis I'm using the index function that I'm testing, which might or might not be a right thing to do
        testFrame = testFrame.rx[True, R.r['with'](
            testFrame,
            R.r['order'](R.IntVector([
                rFunctions.index(testFrame, 'test1'),
                rFunctions.index(testFrame, 'test2'),
                rFunctions.index(testFrame, 'test3')
            ])),
        )]

        actualDataFrameIndex = rFunctions.index(testFrame, 'test2')
        # testing matrix (same values as the dataframe)
        testMatrix = R.r.matrix(R.IntVector([12, 12, 15, 32, 4, 12, 3, 12,
                                             26]),
                                nrow=3)
        testMatrix.colnames = R.StrVector(['test1', 'test2', 'test3'])
        actualMatrixIndex = rFunctions.index(testMatrix, 'test2')

        self.assertEqual(expectedIndex, actualDataFrameIndex)
        self.assertEqual(expectedIndex, actualMatrixIndex)
Пример #2
0
    def test_getColumnsException(self):
        dict = {
            'test1': R.IntVector((12, 15)),
            'test2': R.IntVector((32, 12)),
            'test3': R.IntVector((3, 26))
        }
        testFrame = R.DataFrame(dict)
        # testing dataframe
        testFrame = R.DataFrame(dict)
        # because the dict from which the dataframe is made is not ordened, the testframe is ordened first to make sure that the column 'test2' is always at the same position
        testFrame = testFrame.rx[True, R.r['with'](
            testFrame,
            R.r['order'](R.IntVector([
                rFunctions.index(testFrame, 'test1'),
                rFunctions.index(testFrame, 'test2'),
                rFunctions.index(testFrame, 'test3')
            ])),
        )]

        self.assertRaises(TypeError, rFunctions.getColumns, testFrame)
        self.assertRaises(TypeError, rFunctions.getColumns, testFrame, 'test1',
                          1)
        self.assertRaises(TypeError, rFunctions.getColumns, testFrame, 1,
                          'test1')
        self.assertRaises(TypeError, rFunctions.getColumns,
                          [[1, 2, 3], [3, 2, 2], [4, 3, 2]], 'test1')
Пример #3
0
    def test_index(self):
        expectedIndex = 1
        
        # testing dataframe
        dict = {'test1': R.IntVector((12,12,15)), 'test2': R.IntVector((32,4,12)), 'test3': R.IntVector((3,12,26))}       # note that test1 has 12 in row 1 and row 2 
        testFrame = R.DataFrame(dict)    
        # because the dict from which the dataframe is made is not ordened, the testframe is ordened first to make sure that the column 'test2' is always at the same position
        # however, to do tis I'm using the index function that I'm testing, which might or might not be a right thing to do
        testFrame = testFrame.rx[True, R.r['with'](testFrame, R.r['order'](R.IntVector([rFunctions.index(testFrame, 'test1'), rFunctions.index(testFrame, 'test2'), rFunctions.index(testFrame, 'test3')])),)]

        actualDataFrameIndex = rFunctions.index(testFrame, 'test2')
        # testing matrix (same values as the dataframe)
        testMatrix = R.r.matrix(R.IntVector([12,12,15,32,4,12,3,12,26]), nrow=3)
        testMatrix.colnames = R.StrVector(['test1', 'test2','test3'])
        actualMatrixIndex = rFunctions.index(testMatrix, 'test2')

        self.assertEqual(expectedIndex, actualDataFrameIndex)
        self.assertEqual(expectedIndex, actualMatrixIndex)
Пример #4
0
    def test_indexException(self):
        # testing dataframe
        dict = {'test1': R.IntVector((12,12,15)), 'test2': R.IntVector((32,4,12)), 'test3': R.IntVector((3,12,26))}       # note that test1 has 12 in row 1 and row 2 
        testFrame = R.DataFrame(dict)    
        # because the dict from which the dataframe is made is not ordened, the testframe is ordened first to make sure that the column 'test2' is always at the same position
        # however, to do tis I'm using the index function that I'm testing, which might or might not be a right thing to do
        testFrame = testFrame.rx[True, R.r['with'](testFrame, R.r['order'](R.IntVector([rFunctions.index(testFrame, 'test1'), rFunctions.index(testFrame, 'test2'), rFunctions.index(testFrame, 'test3')])),)]

        actualDataFrameIndex = rFunctions.index(testFrame, 'test2')

        self.assertRaises(KeyError, rFunctions.index,testFrame, 'column_notindf')
Пример #5
0
    def test_boxplotFormulae(self):
        # if the ouput file already exists, remove it
        if os.path.exists(testFolder + 'boxplotFormulae.png'):
            os.remove(testFolder + 'boxplotFormulae.png')

        featDataframe = rFunctions.readCsvFile(testFolder + 'feature.csv')
        featDataframeUniq = rFunctions.getRowsWithUniqColumn(
            featDataframe, 'id')
        precursorPerFeatureDataframe = rFunctions.readCsvFile(
            testFolder + 'feature_precursor.csv', head=True, sep='\t')
        mergedFeatureDataframe = R.r['merge'](featDataframeUniq,
                                              precursorPerFeatureDataframe)
        mergedFeatureDataframe[rFunctions.index(
            mergedFeatureDataframe,
            'intensity')] = R.r['round'](rFunctions.takeLog(
                featDataframeUniq[rFunctions.index(featDataframeUniq,
                                                   'intensity')], 10))
        vector1 = mergedFeatureDataframe[rFunctions.index(
            mergedFeatureDataframe, 'X..precursors')]
        vector2 = mergedFeatureDataframe[rFunctions.index(
            mergedFeatureDataframe, 'intensity')]

        self.plots.boxplotFormulae(testFolder + 'boxplotFormulae.png',
                                   vector1,
                                   vector2,
                                   mergedFeatureDataframe,
                                   title='MS/MS per feature per intensity',
                                   ylab='# of MS/MS per feature',
                                   xlab='Rounded log10 of intensity')
        R.r['dev.off']()
        # if after this the ouput does not exist, fail the test
        if not os.path.exists(testFolder + 'boxplotFormulae.png'):
            self.fail(
                testFolder +
                'boxplotFormulae.png does not exist. File not written out correctly'
            )
        # remove the plot
        if os.path.exists(testFolder + 'boxplotFormulae.png'):
            os.remove(testFolder + 'boxplotFormulae.png')
Пример #6
0
    def test_getColumns(self):
        # the dict of whcih the dataframe is made to give to pass to the function, and of which the expected
        # subset is made:
        expectedDict = {
            'test1': R.IntVector((12, 12, 15)),
            'test3': R.IntVector((3, 12, 26))
        }
        expectedSubset = R.DataFrame(expectedDict)
        # note that expectedDict is the same as testDict, but misses the test2 vector
        testDict = {
            'test1': R.IntVector((12, 12, 15)),
            'test2': R.IntVector((32, 4, 12)),
            'test3': R.IntVector((3, 12, 26))
        }
        # testing dataframe
        testFrame = R.DataFrame(testDict)
        # because the dict from which the dataframe is made is not ordened, the testframe is ordened first to make sure that the column 'test2' is always at the same position
        testFrame = testFrame.rx[True, R.r['with'](
            testFrame,
            R.r['order'](R.IntVector([
                rFunctions.index(testFrame, 'test1'),
                rFunctions.index(testFrame, 'test2'),
                rFunctions.index(testFrame, 'test3')
            ])),
        )]
        actualDfSubset = rFunctions.getColumns(
            testFrame, 'test1', 'test3'
        )  # now only two of the three rows remain, because test1 had 12 twice in the row

        # a list to keep track of the results, can't compare two dataframes directly so want to compare their values only
        expectedResultList = []
        actualResultList = []
        # getting the relevant data from both expected and actual subset.
        for values in expectedSubset.iteritems():
            # and append them to the list
            expectedResultList.append([values[0], values[1][0], values[1][1]])
        for values in actualDfSubset.iteritems():
            actualResultList.append([values[0], values[1][0], values[1][1]])
        self.assertEqual(expectedResultList, actualResultList)
Пример #7
0
    def test_boxplotFormulae(self):
        # if the ouput file already exists, remove it
        if os.path.exists(testFolder+'boxplotFormulae.png'):
            os.remove(testFolder+'boxplotFormulae.png')
            
        featDataframe = rFunctions.readCsvFile(testFolder+'feature.csv')
        featDataframeUniq = rFunctions.getRowsWithUniqColumn(featDataframe, 'id')
        precursorPerFeatureDataframe = rFunctions.readCsvFile(testFolder+'feature_precursor.csv', head=True, sep='\t')
        mergedFeatureDataframe = R.r['merge'](featDataframeUniq, precursorPerFeatureDataframe)
        mergedFeatureDataframe[rFunctions.index(mergedFeatureDataframe, 'intensity')] = R.r['round'](rFunctions.takeLog(featDataframeUniq[rFunctions.index(featDataframeUniq, 'intensity')], 10))
        vector1 = mergedFeatureDataframe[rFunctions.index(mergedFeatureDataframe, 'X..precursors')]
        vector2 = mergedFeatureDataframe[rFunctions.index(mergedFeatureDataframe,'intensity')]

        self.plots.boxplotFormulae(testFolder+'boxplotFormulae.png', vector1, vector2, mergedFeatureDataframe, 
                                title = 'MS/MS per feature per intensity', ylab = '# of MS/MS per feature', xlab = 'Rounded log10 of intensity')
        R.r['dev.off']() 
        # if after this the ouput does not exist, fail the test
        if not os.path.exists(testFolder+'boxplotFormulae.png'):
            self.fail(testFolder+'boxplotFormulae.png does not exist. File not written out correctly')
        # remove the plot
        if os.path.exists(testFolder+'boxplotFormulae.png'):
            os.remove(testFolder+'boxplotFormulae.png')
Пример #8
0
    def test_indexException(self):
        # testing dataframe
        dict = {
            'test1': R.IntVector((12, 12, 15)),
            'test2': R.IntVector((32, 4, 12)),
            'test3': R.IntVector((3, 12, 26))
        }  # note that test1 has 12 in row 1 and row 2
        testFrame = R.DataFrame(dict)
        # because the dict from which the dataframe is made is not ordened, the testframe is ordened first to make sure that the column 'test2' is always at the same position
        # however, to do tis I'm using the index function that I'm testing, which might or might not be a right thing to do
        testFrame = testFrame.rx[True, R.r['with'](
            testFrame,
            R.r['order'](R.IntVector([
                rFunctions.index(testFrame, 'test1'),
                rFunctions.index(testFrame, 'test2'),
                rFunctions.index(testFrame, 'test3')
            ])),
        )]

        actualDataFrameIndex = rFunctions.index(testFrame, 'test2')

        self.assertRaises(KeyError, rFunctions.index, testFrame,
                          'column_notindf')
Пример #9
0
    def test_histogram(self):
        ###TEST 1 HISTOGRAM###
        # if the ouput file already exists, remove it
        if os.path.exists(testFolder + 'test_histogram.png'):
            os.remove(testFolder + 'test_histogram.png')

        # reading in a csv file, seperated by tabs into a matrix
        csvData = rFunctions.readCsvFile(testFolder + 'feature.csv',
                                         sep='\t',
                                         head=True,
                                         na='N/A')
        # get only the rows with unique ids and put it in a new matrix
        csvUniqID = rFunctions.getRowsWithUniqColumn(csvData, 'id')
        # get a vector of all intensities using the index function from R_functions
        intensityVector = csvData[rFunctions.index(csvUniqID, 'intensity')]
        logIntensityVector = rFunctions.takeLog(intensityVector, 10)

        # using all possible **kwargs arguments to test if they are all parsed correctly
        self.plots.histogram(testFolder + 'test_histogram.png',
                             logIntensityVector,
                             plotArgs={'labels': True},
                             width=400,
                             height=400,
                             title='test #features per intensity',
                             ylab='# of test features')
        # if after this the ouput does not exist, fail the test
        if not os.path.exists(testFolder + 'test_histogram.png'):
            self.fail(
                testFolder +
                'test_histogram.png does not exist. File not written out correctly'
            )
        else:
            os.remove(testFolder + 'test_histogram.png')

        ###TEST 3 HISTOGRAMS
        # if the test doesn't give an error it succeeded
        if os.path.exists(testFolder + 'testOverlapHistogram.png'):
            os.remove(
                testFolder + 'testOverlapHistogram.png'
            )  # to make sure that the test isn't passing when the method doesn't work, but the file already exists
        outpng = testFolder + 'testOverlapHistogram.png'
        vector1 = R.IntVector((0, 2, 2, 3, 3, 3, 4, 4, 5))
        vector2 = R.IntVector((2, 4, 4, 5, 5, 5, 6, 6, 7))
        vector3 = R.IntVector((4, 6, 6, 7, 7, 7, 8, 8, 9))
        plots = rPlots.Plots()
        plots.histogram(outpng, vector1, vector2, vector3)
        R.r['dev.off']()
        if os.path.exists(testFolder + 'testOverlapHistogram.png'):
            os.remove(testFolder + 'testOverlapHistogram.png')
Пример #10
0
    def test_barplot(self):
        # reading in a csv file, seperated by tabs into a matrix
        csvData = rFunctions.readCsvFile(testFolder+'feature_precursor.csv', head=True, sep='\t')
        # get the # precursors column and put it in a vector (R dataframe translate '#' to 'X.' and ' ' to '.'
        precursorVector = csvData[rFunctions.index(csvData, 'X..precursors')]

        precursTable = R.r['table'](precursorVector)
        # if the ouput file already exists, remove it
        if os.path.exists(testFolder+'test_barplot.png'):
            os.remove(testFolder+'test_barplot.png')
        
        self.plots.barplot(testFolder+'test_barplot.png', precursTable)
        R.r['dev.off']() 
        
        if not os.path.exists(testFolder+'test_barplot.png'):
            self.fail(testFolder+'test_barplot.png does not exist. File not written out correctly')
        else:
            os.remove(testFolder+'test_barplot.png')
Пример #11
0
    def test_barplot(self):
        # reading in a csv file, seperated by tabs into a matrix
        csvData = rFunctions.readCsvFile(testFolder + 'feature_precursor.csv',
                                         head=True,
                                         sep='\t')
        # get the # precursors column and put it in a vector (R dataframe translate '#' to 'X.' and ' ' to '.'
        precursorVector = csvData[rFunctions.index(csvData, 'X..precursors')]

        precursTable = R.r['table'](precursorVector)
        # if the ouput file already exists, remove it
        if os.path.exists(testFolder + 'test_barplot.png'):
            os.remove(testFolder + 'test_barplot.png')

        self.plots.barplot(testFolder + 'test_barplot.png', precursTable)
        R.r['dev.off']()

        if not os.path.exists(testFolder + 'test_barplot.png'):
            self.fail(
                testFolder +
                'test_barplot.png does not exist. File not written out correctly'
            )
        else:
            os.remove(testFolder + 'test_barplot.png')
Пример #12
0
    def test_histogram(self):
        ###TEST 1 HISTOGRAM###
        # if the ouput file already exists, remove it
        if os.path.exists(testFolder+'test_histogram.png'):
            os.remove(testFolder+'test_histogram.png')
            
        # reading in a csv file, seperated by tabs into a matrix
        csvData = rFunctions.readCsvFile(testFolder+'feature.csv', sep = '\t', head=True, na='N/A')
        # get only the rows with unique ids and put it in a new matrix
        csvUniqID = rFunctions.getRowsWithUniqColumn(csvData, 'id')      
        # get a vector of all intensities using the index function from R_functions
        intensityVector = csvData[rFunctions.index(csvUniqID, 'intensity')]
        logIntensityVector = rFunctions.takeLog(intensityVector, 10)

        # using all possible **kwargs arguments to test if they are all parsed correctly
        self.plots.histogram(testFolder+'test_histogram.png', logIntensityVector, plotArgs={'labels':True}, width=400, height=400, title='test #features per intensity',  ylab = '# of test features')
        # if after this the ouput does not exist, fail the test
        if not os.path.exists(testFolder+'test_histogram.png'):
            self.fail(testFolder+'test_histogram.png does not exist. File not written out correctly')
        else:
            os.remove(testFolder+'test_histogram.png')


        ###TEST 3 HISTOGRAMS
        # if the test doesn't give an error it succeeded
        if os.path.exists(testFolder+'testOverlapHistogram.png'):
            os.remove(testFolder+'testOverlapHistogram.png') # to make sure that the test isn't passing when the method doesn't work, but the file already exists
        outpng = testFolder+'testOverlapHistogram.png'
        vector1 = R.IntVector((0,2,2,3,3,3,4,4,5))
        vector2 = R.IntVector((2,4,4,5,5,5,6,6,7))
        vector3 = R.IntVector((4,6,6,7,7,7,8,8,9))
        plots = rPlots.Plots()
        plots.histogram(outpng,vector1,vector2,vector3)
        R.r['dev.off']()
        if os.path.exists(testFolder+'testOverlapHistogram.png'):
            os.remove(testFolder+'testOverlapHistogram.png')
Пример #13
0
    def test_getRowsWithUniqColumn(self):
        
        expectedDict = {'test1': R.IntVector((12,15)), 'test2': R.IntVector((32,12)), 'test3': R.IntVector((3,26))}  
        expectedSubset = R.DataFrame(expectedDict)
        # because the dict from which the dataframe is made is not ordened, the testframe is ordened first to make sure that the column 'test2' is always at the same position
        expectedSubset = expectedSubset.rx[True, R.r['with'](expectedSubset, R.r['order'](R.IntVector([rFunctions.index(expectedSubset, 'test1'), rFunctions.index(expectedSubset, 'test2'), rFunctions.index(expectedSubset, 'test3')])),)]


        testDict = {'test1': R.IntVector((12,12,15)), 'test2': R.IntVector((32,4,12)), 'test3': R.IntVector((3,12,26))}       # note that test1 has 12 in row 1 and row 2 

        # testing dataframe
        testFrame = R.DataFrame(testDict)    
        # because the dict from which the dataframe is made is not ordened, the testframe is ordened first to make sure that the column 'test2' is always at the same position
        testFrame = testFrame.rx[True, R.r['with'](testFrame, R.r['order'](R.IntVector([rFunctions.index(testFrame, 'test1'), rFunctions.index(testFrame, 'test2'), rFunctions.index(testFrame, 'test3')])),)]
        actualDfSubset = rFunctions.getRowsWithUniqColumn(testFrame, 'test1')            # now only two of the three rows remain, because test1 had 12 twice in the row
        
        # a list to keep track of the results, can't compare two dataframes directly so want to compare their values only
        expectedResultList = []
        actualResultList = []
        # getting the relevant data from both expected and actual subset. 
        for values in expectedSubset.iteritems():
            # and append them to the list
            expectedResultList.append([values[0],values[1][0], values[1][1]])
        for values in actualDfSubset.iteritems():
            actualResultList.append([values[0],values[1][0], values[1][1]])
    
        self.assertEqual(expectedResultList, actualResultList)
Пример #14
0
 def test_getColumnsException(self):
     dict = {'test1':R.IntVector((12,15)), 'test2':R.IntVector((32,12)), 'test3':R.IntVector((3,26))}
     testFrame = R.DataFrame(dict)
     # testing dataframe
     testFrame = R.DataFrame(dict)    
     # because the dict from which the dataframe is made is not ordened, the testframe is ordened first to make sure that the column 'test2' is always at the same position
     testFrame = testFrame.rx[True, R.r['with'](testFrame, R.r['order'](R.IntVector([rFunctions.index(testFrame, 'test1'), rFunctions.index(testFrame, 'test2'), rFunctions.index(testFrame, 'test3')])),)]
     
     self.assertRaises(TypeError, rFunctions.getColumns, testFrame)
     self.assertRaises(TypeError, rFunctions.getColumns, testFrame, 'test1', 1)  
     self.assertRaises(TypeError, rFunctions.getColumns, testFrame,  1, 'test1')  
     self.assertRaises(TypeError, rFunctions.getColumns, [[1,2,3],[3,2,2],[4,3,2]], 'test1')