def test_returns_file_position_to_begining(self): file = BytesIO("{},score,count\n".format( constants.hgvs_nt_column).encode()) read_header_from_io(file) self.assertEqual( file.read(), "{},score,count\n".format(constants.hgvs_nt_column).encode())
def test_does_not_raise_valuerror_2_or_more_values_in_column(self): file = BytesIO("{},score,count\n".format( constants.hgvs_nt_column).encode()) header = read_header_from_io(file) validate_at_least_one_additional_column(header) # Should pass file = BytesIO("{},score\n".format(constants.hgvs_nt_column).encode()) header = read_header_from_io(file) validate_at_least_one_additional_column(header) # Should pass
def test_does_not_raise_valuerror_when_either_hgvs_in_column(self): file = BytesIO("{},score,count\n".format( constants.hgvs_nt_column).encode()) header = read_header_from_io(file) validate_has_hgvs_in_header(header) # Should pass file = BytesIO("{},score,count\n".format( constants.hgvs_pro_column).encode()) header = read_header_from_io(file) validate_has_hgvs_in_header(header) # Should pass
def validate_dataset(countfile=None, scorefile=None, scorejson=None): """ This function calls all of the validation functions within mavetools/mavetools/validators/dataset_validation.py Returns ------- """ # how to incorporate word limit validator? if scorefile is not None: # open scorefile open(scorefile) # this one returns header scoreheader = dataset_validators.read_header_from_io(file=scorefile) # if the header was returned, do these ones dataset_validators.validate_has_hgvs_in_header(header=scoreheader) dataset_validators.validate_at_least_one_additional_column( header=scoreheader) dataset_validators.validate_header_contains_no_null_columns( header=scoreheader) dataset_validators.validate_scoreset_score_data_input(file=scorefile) if scorejson is not None: # open scorejson open(scorejson) dataset_validators.validate_scoreset_json(dict_=scorejson) if countfile is not None: # open countfile open(countfile) countheader = dataset_validators.read_header_from_io(file=countfile) # if the header was returned, do these ones dataset_validators.validate_has_hgvs_in_header(header=countheader) dataset_validators.validate_at_least_one_additional_column( header=countheader) dataset_validators.validate_header_contains_no_null_columns( header=countheader) dataset_validators.validate_scoreset_count_data_input(file=countfile) if scorefile is not None and countfile is not None: dataset_validators.validate_datasets_define_same_variants( scores=scorefile, counts=countfile)
def test_raises_valuerror_when_null_values_in_column(self): for value in constants.null_values_list: file = BytesIO("{},score,{}\n".format(constants.hgvs_nt_column, value).encode()) with self.assertRaises(ValueError): header = read_header_from_io(file) validate_header_contains_no_null_columns(header)
def test_does_not_raise_valuerror_when_non_null_values_in_column(self, ): file = BytesIO("{},score\n".format(constants.hgvs_nt_column).encode()) header = read_header_from_io(file) validate_header_contains_no_null_columns(header) # Should pass
def test_strips_whitespace(self): file = StringIO(" {} , score , count\n".format( constants.hgvs_nt_column)) header = read_header_from_io(file) expected = [constants.hgvs_nt_column, "score", "count"] self.assertEqual(expected, header)
def test_removes_quotes_from_header(self): file = BytesIO('"{}","score","count,nt"\n'.format( constants.hgvs_nt_column).encode()) header = read_header_from_io(file) expected = [constants.hgvs_nt_column, "score", "count,nt"] self.assertEqual(expected, header)
def test_can_read_header_from_bytes(self): file = BytesIO("{},score,count\n".format( constants.hgvs_nt_column).encode()) header = read_header_from_io(file) expected = [constants.hgvs_nt_column, "score", "count"] self.assertEqual(expected, header)
def test_hgvs_must_be_lowercase(self): file = BytesIO("{},score,count\n".format( constants.hgvs_nt_column.upper()).encode()) with self.assertRaises(ValueError): header = read_header_from_io(file) validate_has_hgvs_in_header(header)
def test_raises_valuerror_when_neither_hgvs_col_in_column(self): file = BytesIO("score,count\n".encode()) with self.assertRaises(ValueError): header = read_header_from_io(file) validate_has_hgvs_in_header(header)
def test_raises_valuerror_when_less_than_2_values_in_column(self): file = BytesIO("{}\n".format(constants.hgvs_nt_column).encode()) with self.assertRaises(ValueError): header = read_header_from_io(file) validate_at_least_one_additional_column(header)