def test_validate_good_file_extension(self): test_filepath = os.path.join(self.test_storepath, "test_file.tsv.gz") validator = v.Validator(test_filepath, "pgs-upload", logfile=test_filepath + ".LOG") valid_ext = validator.validate_file_extension() self.assertTrue(valid_ext) # alternative test_filepath = os.path.join(self.test_storepath, "test_file.csv.gz") validator = v.Validator(test_filepath, "pgs-upload", logfile=test_filepath + ".LOG") valid_ext = validator.validate_file_extension() self.assertTrue(valid_ext)
def test_validate_bad_file_extension(self): test_filepath = os.path.join(self.test_storepath, "test_file.zip") validator = v.Validator(test_filepath, "gwas-upload", logfile=test_filepath + ".LOG") valid_ext = validator.validate_file_extension() self.assertFalse(valid_ext)
def test_validate_good_file_headers(self): test_filepath = os.path.join(self.test_storepath, "test_file.tsv") setup_file = prep.SSTestFile() setup_file.prep_test_file() validator = v.Validator(test_filepath, "pgs-upload", logfile=test_filepath + ".LOG") valid_headers = validator.validate_headers() self.assertTrue(valid_headers)
def test_validate_good_file_data(self): test_filepath = os.path.join(self.test_storepath, "test_file.tsv") logfile=test_filepath.replace('tsv', 'LOG') setup_file = prep.SSTestFile() setup_file.prep_test_file() validator = v.Validator(test_filepath, "pgs-upload", logfile=logfile) valid_data = validator.validate_data() self.assertTrue(valid_data)
def validate_file(self): self.set_logfile() self.validation_error = 3 if self.minrows: validator = val.Validator(file=self.store_path, filetype='gwas-upload', error_limit=1, logfile=self.logfile, minrows=self.minrows) else: validator = val.Validator(file=self.store_path, filetype='gwas-upload', error_limit=1, logfile=self.logfile) try: logger.info("Validating file extension...") if not validator.validate_file_extension(): logger.info("VALIDATION FAILED") self.validation_error = 6 return False logger.info("Validating headers...") if not validator.validate_headers(): logger.info("Invalid headers...exiting before any further checks") logger.info("VALIDATION FAILED") self.validation_error = 7 return False logger.info("Validating file for squareness...") if not validator.validate_file_squareness(): logger.info("Rows are malformed..exiting before any further checks") self.validation_error = 8 return False logger.info("Validating rows...") if not validator.validate_rows(): logger.info("File contains too few rows..exiting before any further checks") self.validation_error = 9 return False logger.info("Validating data...") if validator.validate_data(): logger.info("VALIDATION SUCCESSFUL") return True else: logger.info("VALIDATION FAILED") self.validation_error = 3 return False except Exception as e: logger.error(e) logger.info("VALIDATION FAILED") return False
def test_validate_bad_file_headers(self): test_filepath = os.path.join(self.test_storepath, "test_file.tsv") setup_file = prep.SSTestFile() setup_file.set_test_data_dict() setup_file.test_data_dict.pop(EFFECT_DSET) # remove a mandatory field setup_file.prep_test_file() validator = v.Validator(test_filepath, "pgs-upload", logfile=test_filepath + ".LOG") valid_headers = validator.validate_headers() self.assertFalse(valid_headers)
def test_validate_file_headers_missing_snp(self): test_filepath = os.path.join(self.test_storepath, "test_file.tsv") setup_file = prep.SSTestFile() setup_file.set_test_data_dict() setup_file.test_data_dict.pop(SNP_DSET) # remove a snp field setup_file.prep_test_file() validator = v.Validator(test_filepath, "pgs-upload", logfile=test_filepath + ".LOG") valid_headers = validator.validate_headers() self.assertTrue(valid_headers)
def test_validate_bad_file_headers_missing_effect(self): test_filepath = os.path.join(self.test_storepath, "test_file.tsv") setup_file = prep.SSTestFile() setup_file.set_test_data_dict() setup_file.test_data_dict.pop(EFFECT_WEIGHT_DSET) # remove effect_weight field setup_file.test_data_dict.pop(OR_DSET) # remove odds ratio field setup_file.test_data_dict.pop(HR_DSET) # remove hazard ratio field setup_file.prep_test_file() validator = v.Validator(test_filepath, "pgs-upload", logfile=test_filepath + ".LOG") valid_headers = validator.validate_headers() self.assertFalse(valid_headers)
def test_validate_empty_snp_file_data(self): test_filename = "empty_snp.tsv" test_filepath = os.path.join(self.test_storepath, test_filename) logfile=test_filepath.replace('tsv', 'LOG') setup_file = prep.SSTestFile(filename=test_filename) setup_file.set_test_data_dict() setup_file.test_data_dict[SNP_DSET] = ["NA", None, None, None] # set bad snps setup_file.prep_test_file() validator = v.Validator(file=test_filepath, filetype="pgs-upload", logfile=logfile) valid_data = validator.validate_data() self.assertEqual(len(validator.bad_rows), 4) self.assertFalse(valid_data)
def test_validate_bad_effect_allele_file_data(self): test_filename = "bad_effect.tsv" test_filepath = os.path.join(self.test_storepath, test_filename) logfile=test_filepath.replace('tsv', 'LOG') setup_file = prep.SSTestFile(filename=test_filename) setup_file.set_test_data_dict() setup_file.test_data_dict[EFFECT_DSET] = ['A', 'AGG', 'INS:T', 'd'] # set 2 bad alleles setup_file.prep_test_file() validator = v.Validator(file=test_filepath, filetype="pgs-upload", logfile=logfile) valid_data = validator.validate_data() self.assertEqual(len(validator.bad_rows), 2) self.assertFalse(valid_data)
def test_validate_bad_optional_hazard_ratio_file_data(self): test_filename = "bad_hazard.tsv" test_filepath = os.path.join(self.test_storepath, test_filename) logfile=test_filepath.replace('tsv', 'LOG') setup_file = prep.SSTestFile(filename=test_filename) setup_file.set_test_data_dict() setup_file.test_data_dict[HR_DSET] = [1.1232e-23, "invalid", 0.123, .3245] # set 1 bad bps setup_file.prep_test_file() validator = v.Validator(file=test_filepath, filetype="pgs-upload", logfile=logfile) valid_data = validator.validate_data() self.assertEqual(len(validator.bad_rows), 1) self.assertFalse(valid_data)
def test_validate_bad_bp_file_data(self): test_filename = "bad_bp.tsv" test_filepath = os.path.join(self.test_storepath, test_filename) logfile=test_filepath.replace('tsv', 'LOG') setup_file = prep.SSTestFile(filename=test_filename) setup_file.set_test_data_dict() setup_file.test_data_dict[BP_DSET] = [1, 1234567890, "CHR1_122334", 123245] # set 2 bad bps setup_file.prep_test_file() validator = v.Validator(file=test_filepath, filetype="pgs-upload", logfile=logfile) valid_data = validator.validate_data() self.assertEqual(len(validator.bad_rows), 2) self.assertFalse(valid_data)
def test_validate_bad_chr_and_no_snp_file_data(self): test_filename = "bad_chr_no_snp.tsv" test_filepath = os.path.join(self.test_storepath, test_filename) logfile=test_filepath.replace('tsv', 'LOG') setup_file = prep.SSTestFile(filename=test_filename) setup_file.set_test_data_dict() setup_file.test_data_dict[CHR_DSET] = [1, 123, "CHR1", "X"] # set 2 bad chrs setup_file.test_data_dict[SNP_DSET] = ["invalid", 123, "rs1234", "rs151"] # set only one good row setup_file.prep_test_file() validator = v.Validator(file=test_filepath, filetype="pgs-upload", logfile=logfile) valid_data = validator.validate_data() self.assertEqual(len(validator.bad_rows), 3) self.assertFalse(valid_data)
def test_validate_bad_snp_and_no_pos_file_data(self): test_filename = "bad_snp_no_pos.tsv" test_filepath = os.path.join(self.test_storepath, test_filename) logfile=test_filepath.replace('tsv', 'LOG') setup_file = prep.SSTestFile(filename=test_filename) setup_file.set_test_data_dict() setup_file.test_data_dict[SNP_DSET] = ["invalid", "rs123", "1_1234_A_G", "ss151232"] # set bad snps setup_file.test_data_dict[BP_DSET] = [None, 123, "NA", None] # only one good row setup_file.prep_test_file() validator = v.Validator(file=test_filepath, filetype="pgs-upload", logfile=logfile) valid_data = validator.validate_data() self.assertEqual(len(validator.bad_rows), 3) self.assertFalse(valid_data)
def test_validate_bad_chr_file_data(self): test_filename = "bad_chr.tsv" test_filepath = os.path.join(self.test_storepath, test_filename) logfile = test_filepath.replace('tsv', 'LOG') setup_file = prep.SSTestFile(filename=test_filename) setup_file.set_test_data_dict() setup_file.test_data_dict[CHR_DSET] = [1, 123, "CHR1", "X"] # set 2 bad chrs setup_file.prep_test_file() validator = v.Validator(file=test_filepath, filetype="gwas-upload", logfile=logfile) valid_data = validator.validate_data() self.assertEqual(len(validator.bad_rows), 2) self.assertFalse(valid_data)
def test_validate_bad_pvalue_file_data(self): test_filename = "bad_pval.tsv" test_filepath = os.path.join(self.test_storepath, test_filename) logfile = test_filepath.replace('tsv', 'LOG') setup_file = prep.SSTestFile(filename=test_filename) setup_file.set_test_data_dict() setup_file.test_data_dict[PVAL_DSET] = [ "invalid", -123, "another string", 1.5 ] # set bad pvalue setup_file.prep_test_file() validator = v.Validator(file=test_filepath, filetype="gwas-upload", logfile=logfile) valid_data = validator.validate_data() self.assertEqual(len(validator.bad_rows), 4) self.assertFalse(valid_data)