def test_index(self): expectedIndex = 1 # testing dataframe dict = { 'test1': R.IntVector((12, 12, 15)), 'test2': R.IntVector((32, 4, 12)), 'test3': R.IntVector((3, 12, 26)) } # note that test1 has 12 in row 1 and row 2 testFrame = R.DataFrame(dict) # because the dict from which the dataframe is made is not ordened, the testframe is ordened first to make sure that the column 'test2' is always at the same position # however, to do tis I'm using the index function that I'm testing, which might or might not be a right thing to do testFrame = testFrame.rx[True, R.r['with']( testFrame, R.r['order'](R.IntVector([ rFunctions.index(testFrame, 'test1'), rFunctions.index(testFrame, 'test2'), rFunctions.index(testFrame, 'test3') ])), )] actualDataFrameIndex = rFunctions.index(testFrame, 'test2') # testing matrix (same values as the dataframe) testMatrix = R.r.matrix(R.IntVector([12, 12, 15, 32, 4, 12, 3, 12, 26]), nrow=3) testMatrix.colnames = R.StrVector(['test1', 'test2', 'test3']) actualMatrixIndex = rFunctions.index(testMatrix, 'test2') self.assertEqual(expectedIndex, actualDataFrameIndex) self.assertEqual(expectedIndex, actualMatrixIndex)
def test_getColumnsException(self): dict = { 'test1': R.IntVector((12, 15)), 'test2': R.IntVector((32, 12)), 'test3': R.IntVector((3, 26)) } testFrame = R.DataFrame(dict) # testing dataframe testFrame = R.DataFrame(dict) # because the dict from which the dataframe is made is not ordened, the testframe is ordened first to make sure that the column 'test2' is always at the same position testFrame = testFrame.rx[True, R.r['with']( testFrame, R.r['order'](R.IntVector([ rFunctions.index(testFrame, 'test1'), rFunctions.index(testFrame, 'test2'), rFunctions.index(testFrame, 'test3') ])), )] self.assertRaises(TypeError, rFunctions.getColumns, testFrame) self.assertRaises(TypeError, rFunctions.getColumns, testFrame, 'test1', 1) self.assertRaises(TypeError, rFunctions.getColumns, testFrame, 1, 'test1') self.assertRaises(TypeError, rFunctions.getColumns, [[1, 2, 3], [3, 2, 2], [4, 3, 2]], 'test1')
def test_index(self): expectedIndex = 1 # testing dataframe dict = {'test1': R.IntVector((12,12,15)), 'test2': R.IntVector((32,4,12)), 'test3': R.IntVector((3,12,26))} # note that test1 has 12 in row 1 and row 2 testFrame = R.DataFrame(dict) # because the dict from which the dataframe is made is not ordened, the testframe is ordened first to make sure that the column 'test2' is always at the same position # however, to do tis I'm using the index function that I'm testing, which might or might not be a right thing to do testFrame = testFrame.rx[True, R.r['with'](testFrame, R.r['order'](R.IntVector([rFunctions.index(testFrame, 'test1'), rFunctions.index(testFrame, 'test2'), rFunctions.index(testFrame, 'test3')])),)] actualDataFrameIndex = rFunctions.index(testFrame, 'test2') # testing matrix (same values as the dataframe) testMatrix = R.r.matrix(R.IntVector([12,12,15,32,4,12,3,12,26]), nrow=3) testMatrix.colnames = R.StrVector(['test1', 'test2','test3']) actualMatrixIndex = rFunctions.index(testMatrix, 'test2') self.assertEqual(expectedIndex, actualDataFrameIndex) self.assertEqual(expectedIndex, actualMatrixIndex)
def test_indexException(self): # testing dataframe dict = {'test1': R.IntVector((12,12,15)), 'test2': R.IntVector((32,4,12)), 'test3': R.IntVector((3,12,26))} # note that test1 has 12 in row 1 and row 2 testFrame = R.DataFrame(dict) # because the dict from which the dataframe is made is not ordened, the testframe is ordened first to make sure that the column 'test2' is always at the same position # however, to do tis I'm using the index function that I'm testing, which might or might not be a right thing to do testFrame = testFrame.rx[True, R.r['with'](testFrame, R.r['order'](R.IntVector([rFunctions.index(testFrame, 'test1'), rFunctions.index(testFrame, 'test2'), rFunctions.index(testFrame, 'test3')])),)] actualDataFrameIndex = rFunctions.index(testFrame, 'test2') self.assertRaises(KeyError, rFunctions.index,testFrame, 'column_notindf')
def test_boxplotFormulae(self): # if the ouput file already exists, remove it if os.path.exists(testFolder + 'boxplotFormulae.png'): os.remove(testFolder + 'boxplotFormulae.png') featDataframe = rFunctions.readCsvFile(testFolder + 'feature.csv') featDataframeUniq = rFunctions.getRowsWithUniqColumn( featDataframe, 'id') precursorPerFeatureDataframe = rFunctions.readCsvFile( testFolder + 'feature_precursor.csv', head=True, sep='\t') mergedFeatureDataframe = R.r['merge'](featDataframeUniq, precursorPerFeatureDataframe) mergedFeatureDataframe[rFunctions.index( mergedFeatureDataframe, 'intensity')] = R.r['round'](rFunctions.takeLog( featDataframeUniq[rFunctions.index(featDataframeUniq, 'intensity')], 10)) vector1 = mergedFeatureDataframe[rFunctions.index( mergedFeatureDataframe, 'X..precursors')] vector2 = mergedFeatureDataframe[rFunctions.index( mergedFeatureDataframe, 'intensity')] self.plots.boxplotFormulae(testFolder + 'boxplotFormulae.png', vector1, vector2, mergedFeatureDataframe, title='MS/MS per feature per intensity', ylab='# of MS/MS per feature', xlab='Rounded log10 of intensity') R.r['dev.off']() # if after this the ouput does not exist, fail the test if not os.path.exists(testFolder + 'boxplotFormulae.png'): self.fail( testFolder + 'boxplotFormulae.png does not exist. File not written out correctly' ) # remove the plot if os.path.exists(testFolder + 'boxplotFormulae.png'): os.remove(testFolder + 'boxplotFormulae.png')
def test_getColumns(self): # the dict of whcih the dataframe is made to give to pass to the function, and of which the expected # subset is made: expectedDict = { 'test1': R.IntVector((12, 12, 15)), 'test3': R.IntVector((3, 12, 26)) } expectedSubset = R.DataFrame(expectedDict) # note that expectedDict is the same as testDict, but misses the test2 vector testDict = { 'test1': R.IntVector((12, 12, 15)), 'test2': R.IntVector((32, 4, 12)), 'test3': R.IntVector((3, 12, 26)) } # testing dataframe testFrame = R.DataFrame(testDict) # because the dict from which the dataframe is made is not ordened, the testframe is ordened first to make sure that the column 'test2' is always at the same position testFrame = testFrame.rx[True, R.r['with']( testFrame, R.r['order'](R.IntVector([ rFunctions.index(testFrame, 'test1'), rFunctions.index(testFrame, 'test2'), rFunctions.index(testFrame, 'test3') ])), )] actualDfSubset = rFunctions.getColumns( testFrame, 'test1', 'test3' ) # now only two of the three rows remain, because test1 had 12 twice in the row # a list to keep track of the results, can't compare two dataframes directly so want to compare their values only expectedResultList = [] actualResultList = [] # getting the relevant data from both expected and actual subset. for values in expectedSubset.iteritems(): # and append them to the list expectedResultList.append([values[0], values[1][0], values[1][1]]) for values in actualDfSubset.iteritems(): actualResultList.append([values[0], values[1][0], values[1][1]]) self.assertEqual(expectedResultList, actualResultList)
def test_boxplotFormulae(self): # if the ouput file already exists, remove it if os.path.exists(testFolder+'boxplotFormulae.png'): os.remove(testFolder+'boxplotFormulae.png') featDataframe = rFunctions.readCsvFile(testFolder+'feature.csv') featDataframeUniq = rFunctions.getRowsWithUniqColumn(featDataframe, 'id') precursorPerFeatureDataframe = rFunctions.readCsvFile(testFolder+'feature_precursor.csv', head=True, sep='\t') mergedFeatureDataframe = R.r['merge'](featDataframeUniq, precursorPerFeatureDataframe) mergedFeatureDataframe[rFunctions.index(mergedFeatureDataframe, 'intensity')] = R.r['round'](rFunctions.takeLog(featDataframeUniq[rFunctions.index(featDataframeUniq, 'intensity')], 10)) vector1 = mergedFeatureDataframe[rFunctions.index(mergedFeatureDataframe, 'X..precursors')] vector2 = mergedFeatureDataframe[rFunctions.index(mergedFeatureDataframe,'intensity')] self.plots.boxplotFormulae(testFolder+'boxplotFormulae.png', vector1, vector2, mergedFeatureDataframe, title = 'MS/MS per feature per intensity', ylab = '# of MS/MS per feature', xlab = 'Rounded log10 of intensity') R.r['dev.off']() # if after this the ouput does not exist, fail the test if not os.path.exists(testFolder+'boxplotFormulae.png'): self.fail(testFolder+'boxplotFormulae.png does not exist. File not written out correctly') # remove the plot if os.path.exists(testFolder+'boxplotFormulae.png'): os.remove(testFolder+'boxplotFormulae.png')
def test_indexException(self): # testing dataframe dict = { 'test1': R.IntVector((12, 12, 15)), 'test2': R.IntVector((32, 4, 12)), 'test3': R.IntVector((3, 12, 26)) } # note that test1 has 12 in row 1 and row 2 testFrame = R.DataFrame(dict) # because the dict from which the dataframe is made is not ordened, the testframe is ordened first to make sure that the column 'test2' is always at the same position # however, to do tis I'm using the index function that I'm testing, which might or might not be a right thing to do testFrame = testFrame.rx[True, R.r['with']( testFrame, R.r['order'](R.IntVector([ rFunctions.index(testFrame, 'test1'), rFunctions.index(testFrame, 'test2'), rFunctions.index(testFrame, 'test3') ])), )] actualDataFrameIndex = rFunctions.index(testFrame, 'test2') self.assertRaises(KeyError, rFunctions.index, testFrame, 'column_notindf')
def test_histogram(self): ###TEST 1 HISTOGRAM### # if the ouput file already exists, remove it if os.path.exists(testFolder + 'test_histogram.png'): os.remove(testFolder + 'test_histogram.png') # reading in a csv file, seperated by tabs into a matrix csvData = rFunctions.readCsvFile(testFolder + 'feature.csv', sep='\t', head=True, na='N/A') # get only the rows with unique ids and put it in a new matrix csvUniqID = rFunctions.getRowsWithUniqColumn(csvData, 'id') # get a vector of all intensities using the index function from R_functions intensityVector = csvData[rFunctions.index(csvUniqID, 'intensity')] logIntensityVector = rFunctions.takeLog(intensityVector, 10) # using all possible **kwargs arguments to test if they are all parsed correctly self.plots.histogram(testFolder + 'test_histogram.png', logIntensityVector, plotArgs={'labels': True}, width=400, height=400, title='test #features per intensity', ylab='# of test features') # if after this the ouput does not exist, fail the test if not os.path.exists(testFolder + 'test_histogram.png'): self.fail( testFolder + 'test_histogram.png does not exist. File not written out correctly' ) else: os.remove(testFolder + 'test_histogram.png') ###TEST 3 HISTOGRAMS # if the test doesn't give an error it succeeded if os.path.exists(testFolder + 'testOverlapHistogram.png'): os.remove( testFolder + 'testOverlapHistogram.png' ) # to make sure that the test isn't passing when the method doesn't work, but the file already exists outpng = testFolder + 'testOverlapHistogram.png' vector1 = R.IntVector((0, 2, 2, 3, 3, 3, 4, 4, 5)) vector2 = R.IntVector((2, 4, 4, 5, 5, 5, 6, 6, 7)) vector3 = R.IntVector((4, 6, 6, 7, 7, 7, 8, 8, 9)) plots = rPlots.Plots() plots.histogram(outpng, vector1, vector2, vector3) R.r['dev.off']() if os.path.exists(testFolder + 'testOverlapHistogram.png'): os.remove(testFolder + 'testOverlapHistogram.png')
def test_barplot(self): # reading in a csv file, seperated by tabs into a matrix csvData = rFunctions.readCsvFile(testFolder+'feature_precursor.csv', head=True, sep='\t') # get the # precursors column and put it in a vector (R dataframe translate '#' to 'X.' and ' ' to '.' precursorVector = csvData[rFunctions.index(csvData, 'X..precursors')] precursTable = R.r['table'](precursorVector) # if the ouput file already exists, remove it if os.path.exists(testFolder+'test_barplot.png'): os.remove(testFolder+'test_barplot.png') self.plots.barplot(testFolder+'test_barplot.png', precursTable) R.r['dev.off']() if not os.path.exists(testFolder+'test_barplot.png'): self.fail(testFolder+'test_barplot.png does not exist. File not written out correctly') else: os.remove(testFolder+'test_barplot.png')
def test_barplot(self): # reading in a csv file, seperated by tabs into a matrix csvData = rFunctions.readCsvFile(testFolder + 'feature_precursor.csv', head=True, sep='\t') # get the # precursors column and put it in a vector (R dataframe translate '#' to 'X.' and ' ' to '.' precursorVector = csvData[rFunctions.index(csvData, 'X..precursors')] precursTable = R.r['table'](precursorVector) # if the ouput file already exists, remove it if os.path.exists(testFolder + 'test_barplot.png'): os.remove(testFolder + 'test_barplot.png') self.plots.barplot(testFolder + 'test_barplot.png', precursTable) R.r['dev.off']() if not os.path.exists(testFolder + 'test_barplot.png'): self.fail( testFolder + 'test_barplot.png does not exist. File not written out correctly' ) else: os.remove(testFolder + 'test_barplot.png')
def test_histogram(self): ###TEST 1 HISTOGRAM### # if the ouput file already exists, remove it if os.path.exists(testFolder+'test_histogram.png'): os.remove(testFolder+'test_histogram.png') # reading in a csv file, seperated by tabs into a matrix csvData = rFunctions.readCsvFile(testFolder+'feature.csv', sep = '\t', head=True, na='N/A') # get only the rows with unique ids and put it in a new matrix csvUniqID = rFunctions.getRowsWithUniqColumn(csvData, 'id') # get a vector of all intensities using the index function from R_functions intensityVector = csvData[rFunctions.index(csvUniqID, 'intensity')] logIntensityVector = rFunctions.takeLog(intensityVector, 10) # using all possible **kwargs arguments to test if they are all parsed correctly self.plots.histogram(testFolder+'test_histogram.png', logIntensityVector, plotArgs={'labels':True}, width=400, height=400, title='test #features per intensity', ylab = '# of test features') # if after this the ouput does not exist, fail the test if not os.path.exists(testFolder+'test_histogram.png'): self.fail(testFolder+'test_histogram.png does not exist. File not written out correctly') else: os.remove(testFolder+'test_histogram.png') ###TEST 3 HISTOGRAMS # if the test doesn't give an error it succeeded if os.path.exists(testFolder+'testOverlapHistogram.png'): os.remove(testFolder+'testOverlapHistogram.png') # to make sure that the test isn't passing when the method doesn't work, but the file already exists outpng = testFolder+'testOverlapHistogram.png' vector1 = R.IntVector((0,2,2,3,3,3,4,4,5)) vector2 = R.IntVector((2,4,4,5,5,5,6,6,7)) vector3 = R.IntVector((4,6,6,7,7,7,8,8,9)) plots = rPlots.Plots() plots.histogram(outpng,vector1,vector2,vector3) R.r['dev.off']() if os.path.exists(testFolder+'testOverlapHistogram.png'): os.remove(testFolder+'testOverlapHistogram.png')
def test_getRowsWithUniqColumn(self): expectedDict = {'test1': R.IntVector((12,15)), 'test2': R.IntVector((32,12)), 'test3': R.IntVector((3,26))} expectedSubset = R.DataFrame(expectedDict) # because the dict from which the dataframe is made is not ordened, the testframe is ordened first to make sure that the column 'test2' is always at the same position expectedSubset = expectedSubset.rx[True, R.r['with'](expectedSubset, R.r['order'](R.IntVector([rFunctions.index(expectedSubset, 'test1'), rFunctions.index(expectedSubset, 'test2'), rFunctions.index(expectedSubset, 'test3')])),)] testDict = {'test1': R.IntVector((12,12,15)), 'test2': R.IntVector((32,4,12)), 'test3': R.IntVector((3,12,26))} # note that test1 has 12 in row 1 and row 2 # testing dataframe testFrame = R.DataFrame(testDict) # because the dict from which the dataframe is made is not ordened, the testframe is ordened first to make sure that the column 'test2' is always at the same position testFrame = testFrame.rx[True, R.r['with'](testFrame, R.r['order'](R.IntVector([rFunctions.index(testFrame, 'test1'), rFunctions.index(testFrame, 'test2'), rFunctions.index(testFrame, 'test3')])),)] actualDfSubset = rFunctions.getRowsWithUniqColumn(testFrame, 'test1') # now only two of the three rows remain, because test1 had 12 twice in the row # a list to keep track of the results, can't compare two dataframes directly so want to compare their values only expectedResultList = [] actualResultList = [] # getting the relevant data from both expected and actual subset. for values in expectedSubset.iteritems(): # and append them to the list expectedResultList.append([values[0],values[1][0], values[1][1]]) for values in actualDfSubset.iteritems(): actualResultList.append([values[0],values[1][0], values[1][1]]) self.assertEqual(expectedResultList, actualResultList)
def test_getColumnsException(self): dict = {'test1':R.IntVector((12,15)), 'test2':R.IntVector((32,12)), 'test3':R.IntVector((3,26))} testFrame = R.DataFrame(dict) # testing dataframe testFrame = R.DataFrame(dict) # because the dict from which the dataframe is made is not ordened, the testframe is ordened first to make sure that the column 'test2' is always at the same position testFrame = testFrame.rx[True, R.r['with'](testFrame, R.r['order'](R.IntVector([rFunctions.index(testFrame, 'test1'), rFunctions.index(testFrame, 'test2'), rFunctions.index(testFrame, 'test3')])),)] self.assertRaises(TypeError, rFunctions.getColumns, testFrame) self.assertRaises(TypeError, rFunctions.getColumns, testFrame, 'test1', 1) self.assertRaises(TypeError, rFunctions.getColumns, testFrame, 1, 'test1') self.assertRaises(TypeError, rFunctions.getColumns, [[1,2,3],[3,2,2],[4,3,2]], 'test1')