def test_validate_good_file_extension(self):
     test_filepath = os.path.join(self.test_storepath, "test_file.tsv.gz")
     validator = v.Validator(test_filepath, "pgs-upload", logfile=test_filepath + ".LOG")
     valid_ext = validator.validate_file_extension()
     self.assertTrue(valid_ext)
     # alternative
     test_filepath = os.path.join(self.test_storepath, "test_file.csv.gz")
     validator = v.Validator(test_filepath, "pgs-upload", logfile=test_filepath + ".LOG")
     valid_ext = validator.validate_file_extension()
     self.assertTrue(valid_ext)
 def test_validate_bad_file_extension(self):
     test_filepath = os.path.join(self.test_storepath, "test_file.zip")
     validator = v.Validator(test_filepath,
                             "gwas-upload",
                             logfile=test_filepath + ".LOG")
     valid_ext = validator.validate_file_extension()
     self.assertFalse(valid_ext)
 def test_validate_good_file_headers(self):
     test_filepath = os.path.join(self.test_storepath, "test_file.tsv")
     setup_file = prep.SSTestFile()
     setup_file.prep_test_file()
     validator = v.Validator(test_filepath, "pgs-upload", logfile=test_filepath + ".LOG")
     valid_headers = validator.validate_headers()
     self.assertTrue(valid_headers)
 def test_validate_good_file_data(self):
     test_filepath = os.path.join(self.test_storepath, "test_file.tsv")
     logfile=test_filepath.replace('tsv', 'LOG')
     setup_file = prep.SSTestFile()
     setup_file.prep_test_file()
     validator = v.Validator(test_filepath, "pgs-upload", logfile=logfile)
     valid_data = validator.validate_data()
     self.assertTrue(valid_data)
예제 #5
0
    def validate_file(self):
        self.set_logfile()
        self.validation_error = 3
        if self.minrows:
            validator = val.Validator(file=self.store_path, filetype='gwas-upload', error_limit=1, logfile=self.logfile, minrows=self.minrows)
        else:
            validator = val.Validator(file=self.store_path, filetype='gwas-upload', error_limit=1, logfile=self.logfile)
        try:
            logger.info("Validating file extension...")
            if not validator.validate_file_extension():
                logger.info("VALIDATION FAILED")
                self.validation_error = 6
                return False
            logger.info("Validating headers...")
            if not validator.validate_headers():
                logger.info("Invalid headers...exiting before any further checks")
                logger.info("VALIDATION FAILED")
                self.validation_error = 7
                return False

            logger.info("Validating file for squareness...")
            if not validator.validate_file_squareness():
                logger.info("Rows are malformed..exiting before any further checks")
                self.validation_error = 8
                return False

            logger.info("Validating rows...")
            if not validator.validate_rows():
                logger.info("File contains too few rows..exiting before any further checks")
                self.validation_error = 9
                return False

            logger.info("Validating data...")
            if validator.validate_data():
                logger.info("VALIDATION SUCCESSFUL")
                return True
            else:
                logger.info("VALIDATION FAILED")
                self.validation_error = 3
                return False

        except Exception as e:
            logger.error(e)
            logger.info("VALIDATION FAILED")
            return False
 def test_validate_bad_file_headers(self):
     test_filepath = os.path.join(self.test_storepath, "test_file.tsv")
     setup_file = prep.SSTestFile()
     setup_file.set_test_data_dict()
     setup_file.test_data_dict.pop(EFFECT_DSET) # remove a mandatory field
     setup_file.prep_test_file()
     validator = v.Validator(test_filepath, "pgs-upload", logfile=test_filepath + ".LOG")
     valid_headers = validator.validate_headers()
     self.assertFalse(valid_headers)
 def test_validate_file_headers_missing_snp(self):
     test_filepath = os.path.join(self.test_storepath, "test_file.tsv")
     setup_file = prep.SSTestFile()
     setup_file.set_test_data_dict()
     setup_file.test_data_dict.pop(SNP_DSET) # remove a snp field
     setup_file.prep_test_file()
     validator = v.Validator(test_filepath, "pgs-upload", logfile=test_filepath + ".LOG")
     valid_headers = validator.validate_headers()
     self.assertTrue(valid_headers)
 def test_validate_bad_file_headers_missing_effect(self):
     test_filepath = os.path.join(self.test_storepath, "test_file.tsv")
     setup_file = prep.SSTestFile()
     setup_file.set_test_data_dict()
     setup_file.test_data_dict.pop(EFFECT_WEIGHT_DSET) # remove effect_weight field
     setup_file.test_data_dict.pop(OR_DSET)            # remove odds ratio field
     setup_file.test_data_dict.pop(HR_DSET)            # remove hazard ratio field
     setup_file.prep_test_file()
     validator = v.Validator(test_filepath, "pgs-upload", logfile=test_filepath + ".LOG")
     valid_headers = validator.validate_headers()
     self.assertFalse(valid_headers)
 def test_validate_empty_snp_file_data(self):
     test_filename = "empty_snp.tsv"
     test_filepath = os.path.join(self.test_storepath, test_filename)
     logfile=test_filepath.replace('tsv', 'LOG')
     setup_file = prep.SSTestFile(filename=test_filename)
     setup_file.set_test_data_dict()
     setup_file.test_data_dict[SNP_DSET] = ["NA", None, None, None] # set bad snps
     setup_file.prep_test_file()
     validator = v.Validator(file=test_filepath, filetype="pgs-upload", logfile=logfile)
     valid_data = validator.validate_data()
     self.assertEqual(len(validator.bad_rows), 4)
     self.assertFalse(valid_data)
 def test_validate_bad_effect_allele_file_data(self):
     test_filename = "bad_effect.tsv"
     test_filepath = os.path.join(self.test_storepath, test_filename)
     logfile=test_filepath.replace('tsv', 'LOG')
     setup_file = prep.SSTestFile(filename=test_filename)
     setup_file.set_test_data_dict()
     setup_file.test_data_dict[EFFECT_DSET] = ['A', 'AGG', 'INS:T', 'd'] # set 2 bad alleles
     setup_file.prep_test_file()
     validator = v.Validator(file=test_filepath, filetype="pgs-upload", logfile=logfile)
     valid_data = validator.validate_data()
     self.assertEqual(len(validator.bad_rows), 2)
     self.assertFalse(valid_data)
 def test_validate_bad_optional_hazard_ratio_file_data(self):
     test_filename = "bad_hazard.tsv"
     test_filepath = os.path.join(self.test_storepath, test_filename)
     logfile=test_filepath.replace('tsv', 'LOG')
     setup_file = prep.SSTestFile(filename=test_filename)
     setup_file.set_test_data_dict()
     setup_file.test_data_dict[HR_DSET] = [1.1232e-23, "invalid", 0.123, .3245] # set 1 bad bps
     setup_file.prep_test_file()
     validator = v.Validator(file=test_filepath, filetype="pgs-upload", logfile=logfile)
     valid_data = validator.validate_data()
     self.assertEqual(len(validator.bad_rows), 1)
     self.assertFalse(valid_data)
 def test_validate_bad_bp_file_data(self):
     test_filename = "bad_bp.tsv"
     test_filepath = os.path.join(self.test_storepath, test_filename)
     logfile=test_filepath.replace('tsv', 'LOG')
     setup_file = prep.SSTestFile(filename=test_filename)
     setup_file.set_test_data_dict()
     setup_file.test_data_dict[BP_DSET] = [1, 1234567890, "CHR1_122334", 123245] # set 2 bad bps
     setup_file.prep_test_file()
     validator = v.Validator(file=test_filepath, filetype="pgs-upload", logfile=logfile)
     valid_data = validator.validate_data()
     self.assertEqual(len(validator.bad_rows), 2)
     self.assertFalse(valid_data)
 def test_validate_bad_chr_and_no_snp_file_data(self):
     test_filename = "bad_chr_no_snp.tsv"
     test_filepath = os.path.join(self.test_storepath, test_filename)
     logfile=test_filepath.replace('tsv', 'LOG')
     setup_file = prep.SSTestFile(filename=test_filename)
     setup_file.set_test_data_dict()
     setup_file.test_data_dict[CHR_DSET] = [1, 123, "CHR1", "X"] # set 2 bad chrs
     setup_file.test_data_dict[SNP_DSET] = ["invalid", 123, "rs1234", "rs151"] # set only one good row
     setup_file.prep_test_file()
     validator = v.Validator(file=test_filepath, filetype="pgs-upload", logfile=logfile)
     valid_data = validator.validate_data()
     self.assertEqual(len(validator.bad_rows), 3)
     self.assertFalse(valid_data)
 def test_validate_bad_snp_and_no_pos_file_data(self):
     test_filename = "bad_snp_no_pos.tsv"
     test_filepath = os.path.join(self.test_storepath, test_filename)
     logfile=test_filepath.replace('tsv', 'LOG')
     setup_file = prep.SSTestFile(filename=test_filename)
     setup_file.set_test_data_dict()
     setup_file.test_data_dict[SNP_DSET] = ["invalid", "rs123", "1_1234_A_G", "ss151232"] # set bad snps
     setup_file.test_data_dict[BP_DSET] = [None, 123, "NA", None] # only one good row
     setup_file.prep_test_file()
     validator = v.Validator(file=test_filepath, filetype="pgs-upload", logfile=logfile)
     valid_data = validator.validate_data()
     self.assertEqual(len(validator.bad_rows), 3)
     self.assertFalse(valid_data)
 def test_validate_bad_chr_file_data(self):
     test_filename = "bad_chr.tsv"
     test_filepath = os.path.join(self.test_storepath, test_filename)
     logfile = test_filepath.replace('tsv', 'LOG')
     setup_file = prep.SSTestFile(filename=test_filename)
     setup_file.set_test_data_dict()
     setup_file.test_data_dict[CHR_DSET] = [1, 123, "CHR1",
                                            "X"]  # set 2 bad chrs
     setup_file.prep_test_file()
     validator = v.Validator(file=test_filepath,
                             filetype="gwas-upload",
                             logfile=logfile)
     valid_data = validator.validate_data()
     self.assertEqual(len(validator.bad_rows), 2)
     self.assertFalse(valid_data)
 def test_validate_bad_pvalue_file_data(self):
     test_filename = "bad_pval.tsv"
     test_filepath = os.path.join(self.test_storepath, test_filename)
     logfile = test_filepath.replace('tsv', 'LOG')
     setup_file = prep.SSTestFile(filename=test_filename)
     setup_file.set_test_data_dict()
     setup_file.test_data_dict[PVAL_DSET] = [
         "invalid", -123, "another string", 1.5
     ]  # set bad pvalue
     setup_file.prep_test_file()
     validator = v.Validator(file=test_filepath,
                             filetype="gwas-upload",
                             logfile=logfile)
     valid_data = validator.validate_data()
     self.assertEqual(len(validator.bad_rows), 4)
     self.assertFalse(valid_data)