def test_metadata_correct_case2(self):
        '''
        Typically, the metadata is collected following a successful
        validation.  Do that here
        '''
        m = IntegerMatrix()
        resource_path = os.path.join(TESTDIR, 'test_integer_matrix.tsv')
        metadata = m.extract_metadata(resource_path)

        # Parse the test file to ensure we extracted the right content.
        line = open(resource_path).readline()
        contents = line.strip().split('\t')
        samplenames = contents[1:]
        obs_list = [Observation(x) for x in samplenames]

        gene_list = []
        for i, line in enumerate(open(resource_path)):
            if i > 0:
                g = line.split('\t')[0]
                gene_list.append(g)
        feature_list = [Feature(x) for x in gene_list]

        obs_set = ObservationSetSerializer(ObservationSet(obs_list)).data
        feature_set = FeatureSetSerializer(FeatureSet(feature_list)).data

        self.assertEqual(obs_set, metadata[OBSERVATION_SET_KEY])
        self.assertEqual(feature_set, metadata[FEATURE_SET_KEY])
        self.assertIsNone(metadata[PARENT_OP_KEY])
Exemplo n.º 2
0
 def test_fails_with_float_table(self):
     '''
     Capable of parsing a table of mixed numeric types
     '''
     m = IntegerMatrix()
     is_valid, err = m.validate_type(
         os.path.join(TESTDIR, 'test_matrix.tsv'))
     self.assertFalse(is_valid)
Exemplo n.º 3
0
 def test_excel_parses_correctly(self):
     '''
     Test that we can parse an excel spreadsheet provided the 
     data is contained in the first sheet
     '''
     m = IntegerMatrix()
     is_valid, err = m.validate_type(
         os.path.join(TESTDIR, 'test_integer_matrix.xlsx'))
     self.assertTrue(is_valid)
Exemplo n.º 4
0
 def test_table_without_rownames(self):
     '''
     Tables without row names fails
     '''
     m = IntegerMatrix()
     is_valid, err = m.validate_type(
         os.path.join(TESTDIR, 'test_integer_matrix.no_rownames.tsv'))
     self.assertFalse(is_valid)
     self.assertEqual(err, NUMBERED_ROW_NAMES_ERROR)
Exemplo n.º 5
0
 def test_reads_table_without_gene_label(self):
     '''
     Tables with a blank first column name are OK
     '''
     m = IntegerMatrix()
     is_valid, err = m.validate_type(
         os.path.join(TESTDIR, 'test_integer_matrix.no_gene_label.tsv'))
     self.assertTrue(is_valid)
     self.assertIsNone(err)
Exemplo n.º 6
0
 def test_table_without_header(self):
     '''
     Tables without a header row fail
     '''
     m = IntegerMatrix()
     is_valid, err = m.validate_type(
         os.path.join(TESTDIR, 'test_integer_matrix.no_header.tsv'))
     self.assertFalse(is_valid)
     self.assertEqual(err, NUMBERED_COLUMN_NAMES_ERROR)
Exemplo n.º 7
0
 def test_excel_fails_if_not_in_first_sheet(self):
     '''
     If the data is contained on a different sheet than "the first"
     the table is empty.  If the first sheet contained data, then 
     there's really nothing we can do to correct that.
     '''
     m = IntegerMatrix()
     is_valid, err = m.validate_type(
         os.path.join(TESTDIR, 'test_integer_matrix.second_sheet.xlsx'))
     self.assertFalse(is_valid)
     self.assertEqual(err, EMPTY_TABLE_ERROR)
Exemplo n.º 8
0
 def test_fails_parsing_int_table_with_na(self):
     '''
     Here, we take a NaN value which would be typically handled
     gracefully as in the TestIntegerMatrix.test_reads_int_table_with_na
     test function above.  However, we also put a non-integer in the same
     column to test that the special case handling is working properly.
     '''
     m = IntegerMatrix()
     is_valid, err = m.validate_type(
         os.path.join(TESTDIR, 'test_integer_matrix.with_na_and_float.csv'))
     self.assertFalse(is_valid)
Exemplo n.º 9
0
 def test_reads_int_table_with_na(self):
     '''
     Capable of parsing an integer table containing missing
     data.  Note that this requires some special handling
     since NaN's force the column to be parsed as a float,
     even if all other values in the column are integers
     '''
     m = IntegerMatrix()
     is_valid, err = m.validate_type(
         os.path.join(TESTDIR, 'test_integer_matrix.with_na.csv'))
     self.assertTrue(is_valid)
     self.assertIsNone(err)
Exemplo n.º 10
0
    def test_fails_if_filetype_incorrect_case2(self):
        '''
        If a user specifies TSV but the file is, in fact,
        a CSV, we fail out.

        We are unable to decipher (without looking at the table)
        that it was due to an incorrect file extension, but the
        file still fails validation.
        '''
        m = IntegerMatrix()
        is_valid, err = m.validate_type(
            os.path.join(TESTDIR,
                         'test_csv_integer_matrix_labeled_as_tsv.tsv'))
        self.assertFalse(is_valid)
Exemplo n.º 11
0
 def test_fails_parsing_int_table_with_na_and_float(self):
     '''
     Here, we take a NaN value which would be typically handled
     gracefully as in the TestIntegerMatrix.test_reads_int_table_with_na
     test function above.  However, we also put a non-integer in a
     different column (5) to test that the special case handling is 
     working properly.
     '''
     m = IntegerMatrix()
     is_valid, err = m.validate_type(
         os.path.join(TESTDIR,
                      'test_integer_matrix.with_multiple_na_and_float.csv'))
     self.assertFalse(is_valid)
     bad_col_str = 'SW5_Treated (column 5)'
     expected_err_str = NON_INTEGER_ERROR.format(cols=bad_col_str)
     self.assertEqual(err, expected_err_str)
Exemplo n.º 12
0
    def test_metadata_correct_case1(self):
        '''
        Typically, the metadata is collected following a successful
        validation.  Do that here
        '''
        m = IntegerMatrix()
        resource_path = os.path.join(TESTDIR, 'test_integer_matrix.tsv')
        is_valid, err = m.validate_type(resource_path, 'tsv')
        self.assertTrue(is_valid)
        self.assertIsNone(err)

        # OK, the validation worked.  Get metadata
        metadata = m.extract_metadata(resource_path, 'tsv')

        # Parse the test file to ensure we extracted the right content.
        line = open(resource_path).readline()
        contents = line.strip().split('\t')
        samplenames = contents[1:]
        obs_list = [Observation(x) for x in samplenames]

        gene_list = []
        for i, line in enumerate(open(resource_path)):
            if i > 0:
                g = line.split('\t')[0]
                gene_list.append(g)
        feature_list = [Feature(x) for x in gene_list]

        obs_set = ObservationSetSerializer(ObservationSet(obs_list)).data
        feature_set = FeatureSetSerializer(FeatureSet(feature_list)).data

        self.assertEqual(obs_set, metadata[OBSERVATION_SET_KEY])
        # Commented out when removed the feature metadata, as it was causing database
        # issues due to the size of the json object.
        #self.assertEqual(feature_set, metadata[FEATURE_SET_KEY])
        self.assertIsNone(metadata[FEATURE_SET_KEY])
        self.assertIsNone(metadata[PARENT_OP_KEY])
Exemplo n.º 13
0
    def test_reads_integer_table(self):
        '''
        Tables of integers pass validation
        '''
        m = IntegerMatrix()
        is_valid, err = m.validate_type(
            os.path.join(TESTDIR, 'test_integer_matrix.tsv'))
        self.assertTrue(is_valid)
        self.assertIsNone(err)

        m = IntegerMatrix()
        is_valid, err = m.validate_type(
            os.path.join(TESTDIR, 'test_integer_matrix.csv'))
        self.assertTrue(is_valid)
        self.assertIsNone(err)
Exemplo n.º 14
0
 def test_duplicate_rownames_fails(self):
     m = IntegerMatrix()
     is_valid, err = m.validate_type(
         os.path.join(TESTDIR, 'test_matrix.duplicate_rownames.tsv'))
     self.assertFalse(is_valid)
     self.assertEqual(err, NONUNIQUE_ROW_NAMES_ERROR)