def test_getsamplemetadatafromfilename(self): """ Test we are parsing NPC MS filenames correctly (PCSOP.081). """ # Create an empty object with simple filenames dataset = nPYc.NMRDataset('', fileType='empty') dataset.sampleMetadata['Sample File Name'] = [ 'Test1_serum_Rack1_SLT_090114/101', 'Test_serum_Rack10_SLR_090114/10', 'Test2_serum_Rack100_DLT_090114/102', 'Test2_urine_Rack103_MR_090114/20', 'Test2_serum_Rack010_JTP_090114/80', 'Test1_water_Rack10_TMP_090114/90' ] dataset._getSampleMetadataFromFilename( dataset.Attributes['filenameSpec']) rack = pandas.Series([1, 10, 100, 103, 10, 10], name='Rack', dtype=int) pandas.util.testing.assert_series_equal(dataset.sampleMetadata['Rack'], rack) study = pandas.Series( ['Test1', 'Test', 'Test2', 'Test2', 'Test2', 'Test1'], name='Study', dtype=str) pandas.util.testing.assert_series_equal( dataset.sampleMetadata['Study'], study)
def test_exportISATAB(self): nmrData = nPYc.NMRDataset('', fileType='empty') raw_data = { 'Acquired Time': ['09/08/2016 01:36:23', '09/08/2016 01:56:23', '09/08/2016 02:16:23', '09/08/2016 02:36:23', '09/08/2016 02:56:23'], 'AssayRole': ['AssayRole.LinearityReference', 'AssayRole.LinearityReference', 'AssayRole.LinearityReference', 'AssayRole.Assay', 'AssayRole.Assay'], 'SampleType': ['SampleType.StudyPool', 'SampleType.StudyPool', 'SampleType.StudyPool', 'SampleType.StudySample', 'SampleType.StudySample'], 'Subject ID': ['', '', '', 'SCANS-120', 'SCANS-130'], 'Sampling ID': ['', '', '', 'T0-7-S', 'T0-9-S'], 'Study': ['TestStudy', 'TestStudy', 'TestStudy', 'TestStudy', 'TestStudy'], 'Gender': ['', '', '', 'Female', 'Male'], 'Age': ['', '', '', '55', '66'], 'Sampling Date': ['', '', '', '27/02/2006', '28/02/2006'], 'Sample batch': ['', '', '', 'SB 1', 'SB 2'], 'Acquisition batch': ['1', '2', '3', '4', '5'], 'Run Order': ['0', '1', '2', '3', '4'], 'Instrument': ['QTOF 2', 'QTOF 2', 'QTOF 2', 'QTOF 2', 'QTOF 2'], 'Assay data name': ['', '', '', 'SS_LNEG_ToF02_S1W4', 'SS_LNEG_ToF02_S1W5'] } nmrData.sampleMetadata = pandas.DataFrame(raw_data, columns=['Acquired Time', 'AssayRole', 'SampleType', 'Subject ID', 'Sampling ID', 'Study', 'Gender', 'Age', 'Sampling Date', 'Sample batch', 'Acquisition batch', 'Run Order', 'Instrument', 'Assay data name']) with tempfile.TemporaryDirectory() as tmpdirname: nmrData.exportDataset(destinationPath=tmpdirname, saveFormat='ISATAB', withExclusions=False) a = os.path.join(tmpdirname, 'NMRDataset', 'a_npc-test-study_metabolite_profiling_NMR_spectroscopy.txt') self.assertTrue(os.path.exists(a))
def test_updateMasks_warns(self): dataset = nPYc.NMRDataset('', fileType='empty') with self.subTest(msg='Range low == high'): dataset.Attributes['exclusionRegions'] = None self.assertWarnsRegex(UserWarning, 'Low \(1\.10\) and high \(1\.10\) bounds are identical, skipping region', dataset.updateMasks, filterFeatures=True, filterSamples=False, exclusionRegions=(1.1,1.1))
def test_load_npc_lims_masking_reruns(self): limspath = os.path.join('..', '..', 'npc-standard-project', 'Derived_Worklists', 'UnitTest1_NMR_urine_PCSOP.011.csv') dataset = nPYc.NMRDataset('', 'empty') dataset.sampleMetadata = pandas.DataFrame([], columns=['Sample File Name']) dataset.sampleMetadata['Sample File Name'] = ['UnitTest1_Urine_Rack1_SLL_270814/10', 'UnitTest1_Urine_Rack1_SLL_270814/12', 'UnitTest1_Urine_Rack1_SLL_270814/20', 'UnitTest1_Urine_Rack1_SLL_270814/30', 'UnitTest1_Urine_Rack1_SLL_270814/40','UnitTest1_Urine_Rack1_SLL_270814/51', 'UnitTest1_Urine_Rack1_SLL_270814/52', 'UnitTest1_Urine_Rack1_SLL_270814/50', 'UnitTest1_Urine_Rack1_SLL_270814/60', 'UnitTest1_Urine_Rack1_SLL_270814/70', 'UnitTest1_Urine_Rack1_SLL_270814/80', 'UnitTest1_Urine_Rack1_SLL_270814/81', 'UnitTest1_Urine_Rack1_SLL_270814/90'] dataset.intensityData = numpy.zeros((13, 2)) dataset.intensityData[:, 0] = numpy.arange(1, 14, 1) dataset.initialiseMasks() with warnings.catch_warnings(record=True) as w: # Cause all warnings to always be triggered. warnings.simplefilter("always") # warning dataset.addSampleInfo(descriptionFormat='NPC LIMS', filePath=limspath) # check assert issubclass(w[0].category, UserWarning) assert "previous acquisitions masked, latest is kept" in str(w[0].message) with self.subTest(msg='Masking of reruns'): expectedMask = numpy.array([False, True, True, True, True, False, True, False, True, True, False, True, True], dtype=bool) numpy.testing.assert_array_equal(dataset.sampleMask, expectedMask)
def test_updateMasks_raises(self): dataset = nPYc.NMRDataset('', fileType='empty') with self.subTest(msg='No Ranges'): dataset.Attributes['exclusionRegions'] = None self.assertRaises(ValueError, dataset.updateMasks, filterFeatures=True, filterSamples=False, exclusionRegions=None)
def setUp(self): datapath = os.path.join("..", "..", "npc-standard-project", "Derived_Data", "UnitTest3_BI-LISA.xls") sheetname = 'ICLONDON_UNITTEST3' self.testData = nPYc.NMRDataset(datapath, fileType='BI-LISA', pulseProgram=sheetname) # Hardcoded data size self.noSamp = 10 self.noFeat = 105
def test_updateMasks_features(self): noSamp = 10 noFeat = numpy.random.randint(1000, high=10000, size=None) dataset = nPYc.NMRDataset('', fileType='empty') dataset.intensityData = numpy.zeros([10, noFeat],dtype=float) ppm = numpy.linspace(-10, 10, noFeat) dataset.featureMetadata = pandas.DataFrame(ppm, columns=['ppm']) with self.subTest(msg='Single range'): ranges = (-1.1, 1.2) dataset.initialiseMasks() dataset.updateMasks(filterFeatures=True, filterSamples=False, exclusionRegions=ranges) expectedFeatureMask = numpy.logical_or(ppm < ranges[0], ppm > ranges[1]) numpy.testing.assert_array_equal(expectedFeatureMask, dataset.featureMask) with self.subTest(msg='Reversed range'): ranges = (7.1, 1.92) dataset.initialiseMasks() dataset.updateMasks(filterFeatures=True, filterSamples=False, exclusionRegions=ranges) expectedFeatureMask = numpy.logical_or(ppm < ranges[1], ppm > ranges[0]) numpy.testing.assert_array_equal(expectedFeatureMask, dataset.featureMask) with self.subTest(msg='list of ranges'): ranges = [(-5,-1), (1,5)] dataset.initialiseMasks() dataset.updateMasks(filterFeatures=True, filterSamples=False, exclusionRegions=ranges) expectedFeatureMask1 = numpy.logical_or(ppm < ranges[0][0], ppm > ranges[0][1]) expectedFeatureMask2 = numpy.logical_or(ppm < ranges[1][0], ppm > ranges[1][1]) expectedFeatureMask = numpy.logical_and(expectedFeatureMask1, expectedFeatureMask2) numpy.testing.assert_array_equal(expectedFeatureMask, dataset.featureMask)
def test_init(self): noSamp = numpy.random.randint(5, high=10, size=None) noFeat = numpy.random.randint(500, high=1000, size=None) dataset = generateTestDataset(noSamp, noFeat, dtype='NMRDataset', sop='GenericNMRurine') dataset.name = 'Testing' with tempfile.TemporaryDirectory() as tmpdirname: dataset.exportDataset(destinationPath=tmpdirname, saveFormat='CSV', withExclusions=False) pathName = os.path.join(tmpdirname, 'Testing_sampleMetadata.csv') rebuitData = nPYc.NMRDataset(pathName, fileType='CSV Export') numpy.testing.assert_array_equal(rebuitData.intensityData, dataset.intensityData) for column in [ 'Sample File Name', 'SampleType', 'AssayRole', 'Acquired Time', 'Run Order' ]: pandas.util.testing.assert_series_equal( rebuitData.sampleMetadata[column], dataset.sampleMetadata[column], check_dtype=False) for column in ['ppm']: pandas.util.testing.assert_series_equal( rebuitData.featureMetadata[column], dataset.featureMetadata[column], check_dtype=False) self.assertEqual(rebuitData.name, dataset.name)
def generateTestDataset(noSamp, noFeat, dtype='Dataset', variableType=VariableType.Discrete, sop='Generic'): """ Generate a dataset object with random sample and feature numbers, and random contents. .. warning:: Objects returned by this function are not expected to be fully functional! :param int noSamp: Number of samples :param int noFeat: Number of features :param VariableType variableType: Type of enumerations """ if dtype == 'Dataset': data = nPYc.Dataset(sop=sop) elif dtype == 'MSDataset': data = nPYc.MSDataset('', fileType='empty', sop=sop) elif dtype == 'NMRDataset': data = nPYc.NMRDataset('', fileType='empty', sop=sop) else: raise ValueError data.intensityData = numpy.random.lognormal(size=(noSamp, noFeat)) + 1 data.sampleMetadata = pandas.DataFrame(0, index=numpy.arange(noSamp), columns=[ 'Sample File Name', 'SampleType', 'AssayRole', 'Acquired Time', 'Run Order', 'Dilution', 'Detector', 'Correction Batch' ]) data.sampleMetadata[ 'SampleType'] = nPYc.enumerations.SampleType.StudySample data.sampleMetadata['AssayRole'] = nPYc.enumerations.AssayRole.Assay data.sampleMetadata['Run Order'] = numpy.arange(noSamp) data.sampleMetadata['Detector'] = numpy.arange(noSamp) * 5 data.sampleMetadata['Batch'] = 1 data.sampleMetadata['Correction Batch'] = 2 data.sampleMetadata.loc[0:int(noSamp / 2), 'Correction Batch'] = 1 data.sampleMetadata['Exclusion Details'] = '' data.sampleMetadata['Sample File Name'] = [ randomword(10) for x in range(0, noSamp) ] data.sampleMetadata['Dilution'] = numpy.random.rand(noSamp) noClasses = numpy.random.randint(2, 5) classNames = [str(i) for i in range(0, noClasses)] classProbabilties = numpy.random.rand(noClasses) classProbabilties = classProbabilties / sum(classProbabilties) data.sampleMetadata['Classes'] = numpy.random.choice(classNames, size=noSamp, p=classProbabilties) data.sampleMetadata['Acquired Time'] = [ d for d in datetime_range(datetime.now(), noSamp, timedelta(minutes=15)) ] data.sampleMetadata['Acquired Time'] = data.sampleMetadata[ 'Acquired Time'].astype(datetime) data.sampleMetadata.iloc[::10, 1] = nPYc.enumerations.SampleType.StudyPool data.sampleMetadata.iloc[::10, 2] = nPYc.enumerations.AssayRole.PrecisionReference data.sampleMetadata.iloc[ 5::10, 1] = nPYc.enumerations.SampleType.ExternalReference data.sampleMetadata.iloc[ 5::10, 2] = nPYc.enumerations.AssayRole.PrecisionReference if dtype == 'MSDataset' or dtype == 'Dataset': data.featureMetadata = pandas.DataFrame(0, index=numpy.arange(noFeat), columns=['m/z']) data.featureMetadata['m/z'] = numpy.linspace(50, 800, noFeat) data.featureMetadata['Retention Time'] = ( 720 - 50) * numpy.random.rand(noFeat) + 50 data.featureMetadata['Feature Name'] = [ randomword(10) for x in range(0, noFeat) ] data.featureMetadata['ppm'] = numpy.linspace(10, -1, noFeat) elif dtype == 'NMRDataset': data.featureMetadata = pandas.DataFrame(numpy.linspace(10, -1, noFeat), columns=('ppm', ), dtype=float) data.VariableType = variableType data.initialiseMasks() return data
def generateTestDataset(noSamp, noFeat, dtype='Dataset', variableType=VariableType.Discrete, sop='Generic'): """ Generate a dataset object with random sample and feature numbers, and random contents. .. warning:: Objects returned by this function are not expected to be fully functional! :param int noSamp: Number of samples :param int noFeat: Number of features :param VariableType variableType: Type of enumerations """ if dtype == 'Dataset': data = nPYc.Dataset(sop=sop) elif dtype == 'MSDataset': data = nPYc.MSDataset('', fileType='empty', sop=sop) elif dtype == 'NMRDataset': data = nPYc.NMRDataset('', fileType='empty', sop=sop) elif dtype == 'TargetedDataset': data = nPYc.TargetedDataset('', fileType='empty', sop=sop) else: raise ValueError data.intensityData = numpy.random.lognormal(size=(noSamp, noFeat)) + 1 data.sampleMetadata = pandas.DataFrame(0, index=numpy.arange(noSamp), columns=[ 'Sample File Name', 'SampleType', 'AssayRole', 'Acquired Time', 'Run Order', 'Dilution', 'Detector', 'Correction Batch' ]) data.sampleMetadata[ 'SampleType'] = nPYc.enumerations.SampleType.StudySample data.sampleMetadata['AssayRole'] = nPYc.enumerations.AssayRole.Assay data.sampleMetadata['Run Order'] = numpy.arange(noSamp, dtype='int64') data.sampleMetadata['Detector'] = numpy.arange(noSamp) * 5 data.sampleMetadata['Batch'] = 1 data.sampleMetadata['Correction Batch'] = 2 data.sampleMetadata.loc[0:int(noSamp / 2), 'Correction Batch'] = 1 data.sampleMetadata['Exclusion Details'] = '' data.sampleMetadata['Sample File Name'] = [ randomword(10) for x in range(0, noSamp) ] data.sampleMetadata['Sample ID'] = [ randomword(10) for x in range(0, noSamp) ] data.sampleMetadata['Dilution'] = numpy.random.rand(noSamp) noClasses = numpy.random.randint(2, 5) classNames = [str(i) for i in range(0, noClasses)] classProbabilties = numpy.random.rand(noClasses) classProbabilties = classProbabilties / sum(classProbabilties) data.sampleMetadata['Classes'] = numpy.random.choice(classNames, size=noSamp, p=classProbabilties) data.sampleMetadata['Acquired Time'] = [ d for d in datetime_range(datetime.now(), noSamp, timedelta(minutes=15)) ] #Ensure seconds are not recorded, otherwise its impossible to test datasets read with datasets recorded on the fly. data.sampleMetadata['Acquired Time'] = [ datetime.strptime(d.strftime("%Y-%m-%d %H:%M"), "%Y-%m-%d %H:%M") for d in data.sampleMetadata['Acquired Time'] ] data.sampleMetadata['Acquired Time'] = data.sampleMetadata[ 'Acquired Time'].dt.to_pydatetime() data.sampleMetadata.iloc[::10, 1] = nPYc.enumerations.SampleType.StudyPool data.sampleMetadata.iloc[::10, 2] = nPYc.enumerations.AssayRole.PrecisionReference data.sampleMetadata.iloc[ 5::10, 1] = nPYc.enumerations.SampleType.ExternalReference data.sampleMetadata.iloc[ 5::10, 2] = nPYc.enumerations.AssayRole.PrecisionReference if dtype == 'MSDataset': data.featureMetadata = pandas.DataFrame(0, index=numpy.arange(noFeat), columns=['m/z']) data.featureMetadata['m/z'] = (800 - 40) * numpy.random.rand(noFeat) + 40 data.featureMetadata['Retention Time'] = ( 720 - 50) * numpy.random.rand(noFeat) + 50 data.featureMetadata['Feature Name'] = [ randomword(10) for x in range(0, noFeat) ] data.featureMetadata['Exclusion Details'] = None data.featureMetadata['User Excluded'] = False data.featureMetadata[[ 'rsdFilter', 'varianceRatioFilter', 'correlationToDilutionFilter', 'blankFilter', 'artifactualFilter' ]] = pandas.DataFrame([[True, True, True, True, True]], index=data.featureMetadata.index) data.featureMetadata[['rsdSP', 'rsdSS/rsdSP', 'correlationToDilution', 'blankValue']] \ = pandas.DataFrame([[numpy.nan, numpy.nan, numpy.nan, numpy.nan]], index=data.featureMetadata.index) data.Attributes['Feature Names'] = 'Feature Name' elif dtype == 'Dataset': data.featureMetadata = pandas.DataFrame(0, index=numpy.arange(noFeat), columns=['m/z']) data.featureMetadata['m/z'] = (800 - 40) * numpy.random.rand(noFeat) + 40 data.featureMetadata['Retention Time'] = ( 720 - 50) * numpy.random.rand(noFeat) + 50 data.featureMetadata['Feature Name'] = [ randomword(10) for x in range(0, noFeat) ] data.Attributes['Feature Names'] = 'Feature Name' elif dtype == 'NMRDataset': data.featureMetadata = pandas.DataFrame(numpy.linspace(10, -1, noFeat), columns=('ppm', ), dtype=float) data.featureMetadata['Feature Name'] = data.featureMetadata[ 'ppm'].astype(str) data.sampleMetadata['Delta PPM'] = numpy.random.rand(noSamp) data.sampleMetadata['Line Width (Hz)'] = numpy.random.rand(noSamp) data.sampleMetadata['CalibrationFail'] = False data.sampleMetadata['LineWidthFail'] = False data.sampleMetadata['WaterPeakFail'] = False data.sampleMetadata['BaselineFail'] = False data.Attributes['Feature Names'] = 'ppm' data.VariableType = variableType data.initialiseMasks() return data
def test_exportISATAB(self): nmrData = nPYc.NMRDataset('', fileType='empty') raw_data = { 'Acquired Time': [ '2016-08-09 01:36:23', '2016-08-09 01:56:23', '2016-08-09 02:16:23', '2016-08-09 02:36:23', '2016-08-09 02:56:23' ], 'AssayRole': [ 'AssayRole.LinearityReference', 'AssayRole.LinearityReference', 'AssayRole.LinearityReference', 'AssayRole.Assay', 'AssayRole.Assay' ], #'SampleType': ['SampleType.StudyPool', 'SampleType.StudyPool', 'SampleType.StudyPool','SampleType.StudySample', 'SampleType.StudySample'], 'Status': [ 'SampleType.StudyPool', 'SampleType.StudyPool', 'SampleType.StudyPool', 'SampleType.StudySample', 'SampleType.StudySample' ], 'Subject ID': ['', '', '', 'SCANS-120', 'SCANS-130'], 'Sampling ID': ['', '', '', 'T0-7-S', 'T0-9-S'], 'Sample File Name': ['sfn1', 'sfn2', 'sfn3', 'sfn4', 'sfn5'], 'Study': ['TestStudy', 'TestStudy', 'TestStudy', 'TestStudy', 'TestStudy'], 'Gender': ['', '', '', 'Female', 'Male'], 'Age': ['', '', '', '55', '66'], 'Sampling Date': ['', '', '', '27/02/2006', '28/02/2006'], 'Sample batch': ['', '', '', 'SB 1', 'SB 2'], 'Batch': ['1', '2', '3', '4', '5'], 'Run Order': ['0', '1', '2', '3', '4'], 'Instrument': ['QTOF 2', 'QTOF 2', 'QTOF 2', 'QTOF 2', 'QTOF 2'], 'Assay data name': ['', '', '', 'SS_LNEG_ToF02_S1W4', 'SS_LNEG_ToF02_S1W5'] } nmrData.sampleMetadata = pandas.DataFrame( raw_data, columns=[ 'Acquired Time', 'AssayRole', 'Status', 'Subject ID', 'Sampling ID', 'Study', 'Gender', 'Age', 'Sampling Date', 'Sample batch', 'Batch', 'Run Order', 'Instrument', 'Assay data name', 'Sample File Name' ]) with tempfile.TemporaryDirectory() as tmpdirname: details = { 'investigation_identifier': "i1", 'investigation_title': "Give it a title", 'investigation_description': "Add a description", 'investigation_submission_date': "2016-11-03", #use today if not specified 'investigation_public_release_date': "2016-11-03", 'first_name': "Noureddin", 'last_name': "Sadawi", 'affiliation': "University", 'study_filename': "my_nmr_study", 'study_material_type': "Serum", 'study_identifier': "s1", 'study_title': "Give the study a title", 'study_description': "Add study description", 'study_submission_date': "2016-11-03", 'study_public_release_date': "2016-11-03", 'assay_filename': "my_nmr_assay" } nmrData.initialiseMasks() nmrData.exportDataset(destinationPath=tmpdirname, isaDetailsDict=details, saveFormat='ISATAB') investigatio_file = os.path.join(tmpdirname, 'i_investigation.txt') numerrors = 0 with open(investigatio_file) as fp: report = isatab.validate(fp) numerrors = len(report['errors']) #self.assertTrue(os.path.exists(a)) self.assertEqual( numerrors, 0, msg="ISATAB Validator found {} errors in the ISA-Tab archive". format(numerrors))
def test_addSampleInfo_npclims(self): with self.subTest(msg='Urine dataset (UnitTest1).'): dataPath = os.path.join('..', '..', 'npc-standard-project', 'Raw_Data', 'nmr', 'UnitTest1') limsFilePath = os.path.join('..', '..', 'npc-standard-project', 'Derived_Worklists', 'UnitTest1_NMR_urine_PCSOP.011.csv') with warnings.catch_warnings(): warnings.simplefilter("ignore") dataset = nPYc.NMRDataset(dataPath, pulseProgram='noesygppr1d', sop='GenericNMRurine') dataset.sampleMetadata.sort_values('Sample File Name', inplace=True) sortIndex = dataset.sampleMetadata.index.values dataset.intensityData = dataset.intensityData[sortIndex, :] dataset.sampleMetadata = dataset.sampleMetadata.reset_index( drop=True) expected = copy.deepcopy(dataset.sampleMetadata) dataset.addSampleInfo(descriptionFormat='NPC LIMS', filePath=limsFilePath) testSeries = ['Sampling ID', 'Status', 'AssayRole', 'SampleType'] expected['Sampling ID'] = [ 'UT1_S2_u1', 'UT1_S3_u1', 'UT1_S4_u1', 'UT1_S4_u2', 'UT1_S4_u3', 'UT1_S4_u4', 'External Reference Sample', 'Study Pool Sample' ] expected['Status'] = [ 'Sample', 'Sample', 'Sample', 'Sample', 'Sample', 'Sample', 'Long Term Reference', 'Study Reference' ] expected['AssayRole'] = [ AssayRole.Assay, AssayRole.Assay, AssayRole.Assay, AssayRole.Assay, AssayRole.Assay, AssayRole.Assay, AssayRole.PrecisionReference, AssayRole.PrecisionReference ] expected['SampleType'] = [ SampleType.StudySample, SampleType.StudySample, SampleType.StudySample, SampleType.StudySample, SampleType.StudySample, SampleType.StudySample, SampleType.ExternalReference, SampleType.StudyPool ] for series in testSeries: with self.subTest(msg='Testing %s' % series): pandas.util.testing.assert_series_equal( dataset.sampleMetadata[series], expected[series]) with self.subTest(msg='Serum dataset (UnitTest3).'): dataPath = os.path.join('..', '..', 'npc-standard-project', 'Raw_Data', 'nmr', 'UnitTest3') limsFilePath = os.path.join('..', '..', 'npc-standard-project', 'Derived_Worklists', 'UnitTest3_NMR_serum_PCSOP.012.csv') with warnings.catch_warnings(): warnings.simplefilter("ignore") dataset = nPYc.NMRDataset( dataPath, pulseProgram='cpmgpr1d', sop='GenericNMRurine' ) # Use blood sop to avoid calibration of empty spectra dataset.sampleMetadata.sort_values('Sample File Name', inplace=True) sortIndex = dataset.sampleMetadata.index.values dataset.intensityData = dataset.intensityData[sortIndex, :] dataset.sampleMetadata = dataset.sampleMetadata.reset_index( drop=True) expected = copy.deepcopy(dataset.sampleMetadata) dataset.addSampleInfo(descriptionFormat='NPC LIMS', filePath=limsFilePath) testSeries = ['Sampling ID', 'Status', 'AssayRole', 'SampleType'] expected['Sampling ID'] = [ 'UT3_S7', 'UT3_S8', 'UT3_S6', 'UT3_S5', 'UT3_S4', 'UT3_S3', 'UT3_S2', 'External Reference Sample', 'Study Pool Sample', 'UT3_S1' ] expected['Status'] = [ 'Sample', 'Sample', 'Sample', 'Sample', 'Sample', 'Sample', 'Sample', 'Long Term Reference', 'Study Reference', 'nan' ] expected['AssayRole'] = [ AssayRole.Assay, AssayRole.Assay, AssayRole.Assay, AssayRole.Assay, AssayRole.Assay, AssayRole.Assay, AssayRole.Assay, AssayRole.PrecisionReference, AssayRole.PrecisionReference, AssayRole.Assay ] expected['SampleType'] = [ SampleType.StudySample, SampleType.StudySample, SampleType.StudySample, SampleType.StudySample, SampleType.StudySample, SampleType.StudySample, SampleType.StudySample, SampleType.ExternalReference, SampleType.StudyPool, SampleType.StudySample ] for series in testSeries: with self.subTest(msg='Testing %s' % series): pandas.util.testing.assert_series_equal( dataset.sampleMetadata[series], expected[series])
def test_updateMasks_samples(self): from nPYc.enumerations import VariableType, DatasetLevel, AssayRole, SampleType dataset = nPYc.NMRDataset('', fileType='empty') dataset.intensityData = numpy.zeros([18, 5],dtype=float) dataset.sampleMetadata['AssayRole'] = pandas.Series([AssayRole.LinearityReference, AssayRole.LinearityReference, AssayRole.LinearityReference, AssayRole.LinearityReference, AssayRole.LinearityReference, AssayRole.PrecisionReference, AssayRole.PrecisionReference, AssayRole.PrecisionReference, AssayRole.PrecisionReference, AssayRole.PrecisionReference, AssayRole.PrecisionReference, AssayRole.Assay, AssayRole.Assay, AssayRole.Assay, AssayRole.Assay, AssayRole.Assay, AssayRole.PrecisionReference, AssayRole.PrecisionReference], name='AssayRole', dtype=object) dataset.sampleMetadata['SampleType'] = pandas.Series([SampleType.StudyPool, SampleType.StudyPool, SampleType.StudyPool, SampleType.StudyPool, SampleType.StudyPool, SampleType.StudyPool, SampleType.StudyPool, SampleType.StudyPool, SampleType.StudyPool, SampleType.StudyPool, SampleType.StudyPool, SampleType.StudySample, SampleType.StudySample, SampleType.StudySample, SampleType.StudySample, SampleType.StudySample, SampleType.ExternalReference, SampleType.MethodReference], name='SampleType', dtype=object) with self.subTest(msg='Default Parameters'): expectedSampleMask = numpy.array([False, False, False, False, False, True, True, True, True, True, True, True, True, True, True, True, False, False], dtype=bool) dataset.initialiseMasks() dataset.updateMasks(filterFeatures=False) numpy.testing.assert_array_equal(expectedSampleMask, dataset.sampleMask) with self.subTest(msg='Export SP and ER'): expectedSampleMask = numpy.array([False, False, False, False, False, True, True, True, True, True, True, False, False, False, False, False, True, False], dtype=bool) dataset.initialiseMasks() dataset.updateMasks(filterFeatures=False, sampleTypes=[SampleType.StudyPool, SampleType.ExternalReference], assayRoles=[AssayRole.PrecisionReference]) numpy.testing.assert_array_equal(expectedSampleMask, dataset.sampleMask) with self.subTest(msg='Export Dilution Samples only'): expectedSampleMask = numpy.array([True, True, True, True, True, False, False, False, False, False, False, False, False, False, False, False, False, False], dtype=bool) dataset.initialiseMasks() dataset.updateMasks(filterFeatures=False, sampleTypes=[SampleType.StudyPool], assayRoles=[AssayRole.LinearityReference]) numpy.testing.assert_array_equal(expectedSampleMask, dataset.sampleMask)
def test_reports(self): from nPYc.enumerations import AssayRole from nPYc.enumerations import SampleType from datetime import datetime """ Validate generate feature summary report at the moment all it will test is if the plots and reports are saved, not checking contents """ # empty object testData = nPYc.NMRDataset('', fileType='empty') #need to hardcode in attributes for testing purposes only rather than read in from the SOP, some are generated from the code testData.Attributes['WP_highRegionTo'] =5.0 testData.Attributes['WP_lowRegionFrom']=4.6000000000000005 testData.Attributes['BL_lowRegionFrom']=-1.0 testData.Attributes['BL_highRegionTo']=10.0 testData.Attributes['alignTo']= 'xxxx'#as i dont want it to execute plotting code, maybe for future to do code coverage will have to modify and let it use default from SOP testData.sampleMetadata['BL_low_outliersFailArea']=[False, False, False, False] testData.sampleMetadata['BL_low_outliersFailNeg']=[False, False, False, False] testData.sampleMetadata['BL_high_outliersFailArea']=[False, False, False, False] testData.sampleMetadata['BL_high_outliersFailNeg']=[False, False, False, False] testData.sampleMetadata['WP_low_outliersFailArea']=[False, False, False, False] testData.sampleMetadata['WP_low_outliersFailNeg']=[False, False, False, False] testData.sampleMetadata['WP_high_outliersFailArea']=[False, False, False, False] testData.sampleMetadata['WP_high_outliersFailNeg']=[False, False, False, False] testData.sampleMetadata['Rack']=['Rack1', 'Rack1', 'Rack1', 'Rack1'] testData.sampleMetadata['Study']=['unitTest', 'unitTest', 'unitTest', 'unitTest'] testData.sampleMetadata['AssayRole'] = pandas.Series([ AssayRole.Assay, AssayRole.Assay, AssayRole.Assay, AssayRole.Assay,], name='AssayRole', dtype=object) testData.sampleMetadata['SampleType'] = pandas.Series([SampleType.StudySample, SampleType.StudySample, SampleType.StudySample, SampleType.StudySample,], name='SampleType', dtype=object) testData.sampleMetadata['Acquired Time'] = [datetime(2012,12,1), datetime(2012,12,2), datetime(2012,12,3), datetime(2012,12,4)] testData.sampleMetadata['Exclusion Details'] = ['None', 'None', 'None', 'None'] testData.sampleMetadata['ImportFail'] = [False, False, False, False] testData.sampleMetadata['exceed90critical'] = [False, False, False, False] testData.sampleMetadata['calibrPass'] = [True, True, True, True] testData.sampleMetadata['Line Width (Hz)']=[0.818454, 1.060146, 0.876968, 0.876968] testData.sampleMetadata['BL_low_failArea']=[0.220022, 0.000000, 1.210121, 1.210121] testData.sampleMetadata['BL_low_failNeg']=[0.0, 0.0, 0.0, 0.0] testData.sampleMetadata['BL_high_failArea']=[7.929515, 6.387665, 11.563877, 11.563877] testData.sampleMetadata['BL_high_failNeg']=[0.0, 0.0, 0.0, 0.0] testData.sampleMetadata['WP_low_failArea']=[1.657459, 0.000000, 1.210121, 1.210121] testData.sampleMetadata['WP_low_failNeg']=[0.0, 0.0, 28.176796, 28.176796] testData.sampleMetadata['WP_high_failArea']=[19.889503, 13.812155, 53.038674, 53.038674] testData.sampleMetadata['WP_high_failNeg']=[0.0, 0.0, 0.0, 0.0] testData.sampleMetadata['Status'] = ['Sample', 'Sample', 'Sample', 'Sample'] testData.sampleMetadata['path']=['UNITTEST01_test/UNITTEST01_Plasma_Rack39_RCM_101214/10', 'UNITTEST01_test/UNITTEST01_Plasma_Rack39_RCM_101214/20', 'UNITTEST01_test/UNITTEST01_Plasma_Rack39_RCM_101214/30', 'UNITTEST01_Plasma_Rack39_RCM_101214/40'] testData.sampleMetadata['Sample File Name']=['UNITTEST01_Plasma_Rack39_RCM_101214/10', 'UNITTEST01_Plasma_Rack39_RCM_101214/20', 'UNITTEST01_Plasma_Rack39_RCM_101214/30', 'UNITTEST01_Plasma_Rack39_RCM_101214/40'] testData.sampleMetadata['Sample Base Name']=['UNITTEST01_Plasma_Rack39_RCM_101214/10', 'UNITTEST01_Plasma_Rack39_RCM_101214/20', 'UNITTEST01_Plasma_Rack39_RCM_101214/30', 'UNITTEST01_Plasma_Rack39_RCM_101214/40'] noFeat = 2000 testData.sampleMask = numpy.array([False, True, False, False], dtype=bool) testData.featureMask = numpy.ones(noFeat, dtype=bool) testData.intensityData = numpy.random.randn(4, noFeat) testData.featureMetadata = pandas.DataFrame(numpy.linspace(10, -1, noFeat), columns=('ppm',), dtype=float) # create a temporary directory using the context manager with tempfile.TemporaryDirectory() as tmpdirname: _generateReportNMR(testData, 'feature summary', output=tmpdirname)#run the code for feature summary assert os.path.exists(os.path.join(tmpdirname,'graphics','report_featureSummary', 'NMRDataset_calibrationCheck.png')) == 1 assert os.path.exists(os.path.join(tmpdirname,'graphics','report_featureSummary', 'NMRDataset_finalFeatureBLWPplots1.png')) == 1 assert os.path.exists(os.path.join(tmpdirname,'graphics','report_featureSummary', 'NMRDataset_finalFeatureBLWPplots3.png')) ==1 assert os.path.exists(os.path.join(tmpdirname,'graphics','report_featureSummary', 'NMRDataset_finalFeatureIntensityHist.png')) ==1 assert os.path.exists(os.path.join(tmpdirname,'graphics', 'report_featureSummary','NMRDataset_peakWidthBoxplot.png'))==1 assert os.path.exists(os.path.join(tmpdirname,'graphics', 'report_featureSummary','npc-main.css'))==1 assert os.path.exists(os.path.join(tmpdirname,'NMRDataset_report_featureSummary.html')) ==1 #test final report using same data with tempfile.TemporaryDirectory() as tmpdirname: _generateReportNMR(testData, 'final report', output=tmpdirname, withExclusions=False)#run the code for feature summary assert os.path.exists(os.path.join(tmpdirname,'graphics', 'report_finalReport','NMRDataset_finalFeatureBLWPplots1.png')) == 1 assert os.path.exists(os.path.join(tmpdirname,'graphics', 'report_finalReport','NMRDataset_finalFeatureBLWPplots3.png')) ==1 assert os.path.exists(os.path.join(tmpdirname,'graphics', 'report_finalReport','NMRDataset_finalFeatureIntensityHist.png')) ==1 assert os.path.exists(os.path.join(tmpdirname,'graphics', 'report_finalReport','NMRDataset_peakWidthBoxplot.png'))==1 assert os.path.exists(os.path.join(tmpdirname,'graphics', 'report_finalReport','npc-main.css'))==1 assert os.path.exists(os.path.join(tmpdirname,'NMRDataset_report_finalReport.html')) ==1