def test_valid_submission_yaml(self): print( '___SUBMISSION_FILE_VALIDATION: Testing valid yaml submission___') self.validator = None self.validator = SubmissionFileValidator() valid_sub_yaml = os.path.join(self.base_dir, self.valid_file) sub_yaml_obj = yaml.load_all(open(valid_sub_yaml, 'r'), Loader=Loader) self.validator.validate(file_path=valid_sub_yaml, data=sub_yaml_obj) self.validator.print_errors(valid_sub_yaml)
def test_valid_submission_yaml_with_associated_records(self): print '___SUBMISSION_FILE_VALIDATION: Testing valid yaml submission with associated records___' self.validator = None self.validator = SubmissionFileValidator() valid_sub_yaml = os.path.join(self.base_dir, self.valid_file_with_associated_records) self.assertTrue(self.validator.validate(file_path=valid_sub_yaml)) self.assertTrue(not self.validator.has_errors(valid_sub_yaml)) self.validator.print_errors(valid_sub_yaml)
def test_ioerror_submission_yaml(self): print('___SUBMISSION_FILE_VALIDATION: ' \ 'Testing ioerror yaml submission___') self.validator = None self.validator = SubmissionFileValidator() invalid_sub_yaml = os.path.join(self.base_dir, self.valid_file[:-1]) self.assertEqual(self.validator.validate(file_path=invalid_sub_yaml), False) self.validator.print_errors(invalid_sub_yaml)
def test_valid_submission_yaml_with_empty_section(self): print('___SUBMISSION_FILE_VALIDATION: Testing valid yaml ' \ 'submission without main section___') self.validator = None self.validator = SubmissionFileValidator() valid_sub_yaml = os.path.join(self.base_dir, self.valid_empty_file) self.assertEqual(self.validator.validate(file_path=valid_sub_yaml), True) self.validator.print_errors(valid_sub_yaml)
def get_submission_validator(old_schema): """ Returns a SubmissionFileValidator object :param old_schema: whether the schema version for the submission.yaml is 0.1.0 :return: SubmissionFileValidator object """ if old_schema: return SubmissionFileValidator(schema_version='0.1.0') else: return SubmissionFileValidator()
def test_valid_submission_yaml_with_license(self): print('___SUBMISSION_FILE_VALIDATION: ' \ 'Testing valid yaml submission with license___') self.validator = None self.validator = SubmissionFileValidator() valid_sub_yaml = os.path.join(self.base_dir, self.valid_license_file) is_valid = self.validator.validate(file_path=valid_sub_yaml) self.validator.print_errors(valid_sub_yaml) self.assertEqual(is_valid, True)
def test_valid_submission_yaml(self): print '___SUBMISSION_FILE_VALIDATION: Testing valid yaml submission___' self.validator = None self.validator = SubmissionFileValidator() valid_sub_yaml = os.path.join(self.base_dir, self.valid_file) sub_yaml_obj = yaml.load_all(open(valid_sub_yaml, 'r')) self.validator.validate(file_path=valid_sub_yaml, data=sub_yaml_obj) self.validator.print_errors(valid_sub_yaml)
def test_invalid_schema_file(): # Fudge the schema versions constant so we can check the file check works VALID_SCHEMA_VERSIONS.append('0.9999.9999') try: with pytest.raises(ValueError) as excinfo: validator = SubmissionFileValidator(schema_version='0.9999.9999') assert "Invalid schema file" in str(excinfo.value) finally: VALID_SCHEMA_VERSIONS.pop()
def test_valid_submission_yaml_with_empty_section(self): print '___SUBMISSION_FILE_VALIDATION: Testing valid yaml ' \ 'submission without main section___' self.validator = None self.validator = SubmissionFileValidator() valid_sub_yaml = os.path.join(self.base_dir, self.valid_empty_file) self.assertEqual(self.validator.validate(file_path=valid_sub_yaml), True) self.validator.print_errors(valid_sub_yaml)
def test_invalid_syntax(self): self.validator = SubmissionFileValidator() invalid_syntax_file = os.path.join(self.base_dir, self.invalid_syntax_file) self.assertFalse( self.validator.validate(file_path=invalid_syntax_file)) self.assertTrue(self.validator.has_errors(invalid_syntax_file)) self.assertTrue( len(self.validator.get_messages(invalid_syntax_file)) == 1) self.validator.print_errors(invalid_syntax_file) for message in self.validator.get_messages(invalid_syntax_file): print(message.message) self.assertTrue( message.message.index("There was a problem parsing the file.") == 0) self.assertTrue(len(self.validator.get_messages()) == 1) self.validator.clear_messages() self.assertTrue(len(self.validator.get_messages()) == 0)
def test_invalid_submission_yaml(self): print '___SUBMISSION_FILE_VALIDATION: ' \ 'Testing invalid yaml submission___' self.validator = None self.validator = SubmissionFileValidator() invalid_sub_yaml = os.path.join(self.base_dir, self.invalid_file) self.assertEqual(self.validator.validate( file_path=invalid_sub_yaml), False ) self.validator.print_errors(invalid_sub_yaml)
def test_valid_submission_yaml_with_license(self): print '___SUBMISSION_FILE_VALIDATION: ' \ 'Testing valid yaml submission with license___' self.validator = None self.validator = SubmissionFileValidator() valid_sub_yaml = os.path.join(self.base_dir, self.valid_license_file) self.assertEqual(self.validator.validate(file_path=valid_sub_yaml), True) self.validator.print_errors(valid_sub_yaml)
def test_invalid_syntax(self): self.validator = SubmissionFileValidator() invalid_syntax_file = os.path.join(self.base_dir, self.invalid_syntax_file) self.assertFalse(self.validator.validate(file_path=invalid_syntax_file)) self.assertTrue(self.validator.has_errors(invalid_syntax_file)) self.assertTrue(len(self.validator.get_messages(invalid_syntax_file)) == 1) self.validator.print_errors(invalid_syntax_file) for message in self.validator.get_messages(invalid_syntax_file): print message.message self.assertTrue(message.message.index("There was a problem parsing the file.") == 0) self.assertTrue(len(self.validator.get_messages()) == 1) self.validator.clear_messages() self.assertTrue(len(self.validator.get_messages()) == 0)
def process_submission_directory(basepath, submission_file_path, recid, update=False, *args, **kwargs): """ Goes through an entire submission directory and processes the files within to create DataSubmissions with the files and related material attached as DataResources. :param basepath: :param submission_file_path: :param recid: :param update: :return: """ added_file_names = [] errors = {} if submission_file_path is not None: submission_file_validator = SubmissionFileValidator() is_valid_submission_file = submission_file_validator.validate( file_path=submission_file_path) if is_valid_submission_file: submission_file = open(submission_file_path, 'r') submission_processed = yaml.load_all(submission_file, Loader=Loader) # process file, extracting contents, and linking # the data record with the parent publication hepsubmission = get_latest_hepsubmission(publication_recid=recid) if hepsubmission is None: HEPSubmission(publication_recid=recid, overall_status='todo', inspire_id=hepsubmission.inspire_id, coordinator=kwargs.get('user_id') if 'user_id' in kwargs else int(current_user.get_id()), version=hepsubmission.version + 1) # On a new upload, we reset the flag to notify reviewers hepsubmission.reviewers_notified = False # if it is finished and we receive an update, # then we need to reopen the submission to allow for revisions. if hepsubmission.overall_status == 'finished' and not update: # we create a new HEPSubmission object _rev_hepsubmission = HEPSubmission( publication_recid=recid, overall_status='todo', inspire_id=hepsubmission.inspire_id, coordinator=hepsubmission.coordinator, version=hepsubmission.version + 1) db.session.add(_rev_hepsubmission) hepsubmission = _rev_hepsubmission reserve_doi_for_hepsubmission(hepsubmission, update) no_general_submission_info = True data_file_validator = DataFileValidator() # Delete all data records associated with this submission. # Fixes problems with ordering where the table names are changed between uploads. # See https://github.com/HEPData/hepdata/issues/112 # Side effect that reviews will be deleted between uploads. cleanup_submission(recid, hepsubmission.version, added_file_names) for yaml_document_index, yaml_document in enumerate( submission_processed): if not yaml_document: continue # Check for presence of local files given as additional_resources. if 'additional_resources' in yaml_document: for resource in yaml_document['additional_resources']: location = os.path.join(basepath, resource['location']) if not resource['location'].startswith( ('http', '/resource/')): if not os.path.isfile(location): errors[resource['location']] = [{ "level": "error", "message": "Missing 'additional_resources' file from uploaded archive." }] elif '/' in resource['location']: errors[resource['location']] = [{ "level": "error", "message": "Location of 'additional_resources' file should not contain '/'." }] if not yaml_document_index and 'name' not in yaml_document: no_general_submission_info = False process_general_submission_info(basepath, yaml_document, recid) elif not all(k in yaml_document for k in ('name', 'description', 'keywords', 'data_file')): errors["submission.yaml"] = [{ "level": "error", "message": "YAML document with index {} ".format( yaml_document_index) + "missing one or more required keys (name, description, keywords, data_file)." }] else: existing_datasubmission_query = DataSubmission.query \ .filter_by(name=encode_string(yaml_document["name"]), publication_recid=recid, version=hepsubmission.version) added_file_names.append(yaml_document["name"]) try: if existing_datasubmission_query.count() == 0: datasubmission = DataSubmission( publication_recid=recid, name=encode_string(yaml_document["name"]), description=encode_string( yaml_document["description"]), version=hepsubmission.version) else: datasubmission = existing_datasubmission_query.one( ) datasubmission.description = encode_string( yaml_document["description"]) db.session.add(datasubmission) except SQLAlchemyError as sqlex: errors[yaml_document["data_file"]] = [{ "level": "error", "message": str(sqlex) }] db.session.rollback() continue main_file_path = os.path.join(basepath, yaml_document["data_file"]) data, ex = _eos_fix_read_data(main_file_path) if not data or data is None or ex is not None: errors[yaml_document["data_file"]] = \ [{"level": "error", "message": "There was a problem parsing the file.\n" + str(ex)}] elif '/' in yaml_document["data_file"]: errors[yaml_document["data_file"]] = \ [{"level": "error", "message": "Name of data_file should not contain '/'.\n"}] else: if data_file_validator.validate( file_path=main_file_path, data=data): try: process_data_file(recid, hepsubmission.version, basepath, yaml_document, datasubmission, main_file_path) except SQLAlchemyError as sqlex: errors[yaml_document["data_file"]] = [{ "level": "error", "message": "There was a problem processing the file.\n" + str(sqlex) }] db.session.rollback() else: errors = process_validation_errors_for_display( data_file_validator.get_messages()) data_file_validator.clear_messages() if yaml_document["data_file"] not in errors: # Check that the length of the 'values' list is consistent # for each of the independent_variables and dependent_variables. indep_count = [ len(indep['values']) for indep in data['independent_variables'] ] dep_count = [ len(dep['values']) for dep in data['dependent_variables'] ] if len(set(indep_count + dep_count) ) > 1: # if more than one unique count errors.setdefault( yaml_document["data_file"], [] ).append({ "level": "error", "message": "Inconsistent length of 'values' list:\n" + "independent_variables{}, dependent_variables{}" .format(str(indep_count), str(dep_count)) }) submission_file.close() if no_general_submission_info: hepsubmission.last_updated = datetime.now() db.session.add(hepsubmission) db.session.commit() # The line below is commented out since it does not preserve the order of tables. # Delete all tables above instead: side effect of deleting reviews between uploads. #cleanup_submission(recid, hepsubmission.version, added_file_names) db.session.commit() if len(errors) is 0: errors = package_submission(basepath, recid, hepsubmission) reserve_dois_for_data_submissions( publication_recid=recid, version=hepsubmission.version) admin_indexer = AdminIndexer() admin_indexer.index_submission(hepsubmission) else: # delete all tables if errors cleanup_submission(recid, hepsubmission.version, {}) else: errors = process_validation_errors_for_display( submission_file_validator.get_messages()) submission_file_validator.clear_messages() else: # return an error errors = { "submission.yaml": [{ "level": "error", "message": "No submission.yaml file found in submission." }] } return errors # we return all the errors collectively. # This makes more sense that returning errors as # soon as problems are found on one file. return errors
def test_no_file_path_supplied(self): self.validator = SubmissionFileValidator() try: self.validator.validate(file_path=None) except LookupError as le: assert (le)
class SubmissionFileValidationTest(unittest.TestCase): validator = None def setUp(self): self.base_dir = os.path.dirname(os.path.realpath(__file__)) self.valid_license_file = 'test_data/valid_submission_license.yaml' self.valid_file = 'test_data/valid_submission.yaml' self.valid_file_with_associated_records = 'test_data/valid_submission_with_associated_record.yaml' self.valid_empty_file = 'test_data/valid_submission_empty.yaml' self.invalid_file = 'test_data/invalid_submission.yaml' self.invalid_syntax_file = 'test_data/invalid_syntax_submission.yaml' self.invalid_parser_file = 'test_data/invalid_parser_submission.yaml' def test_valid_submission_yaml(self): print( '___SUBMISSION_FILE_VALIDATION: Testing valid yaml submission___') self.validator = None self.validator = SubmissionFileValidator() valid_sub_yaml = os.path.join(self.base_dir, self.valid_file) sub_yaml_obj = yaml.load_all(open(valid_sub_yaml, 'r'), Loader=Loader) self.validator.validate(file_path=valid_sub_yaml, data=sub_yaml_obj) self.validator.print_errors(valid_sub_yaml) def test_no_file_path_supplied(self): self.validator = SubmissionFileValidator() try: self.validator.validate(file_path=None) except LookupError as le: assert (le) def test_invalid_syntax(self): self.validator = SubmissionFileValidator() invalid_syntax_file = os.path.join(self.base_dir, self.invalid_syntax_file) self.assertFalse( self.validator.validate(file_path=invalid_syntax_file)) self.assertTrue(self.validator.has_errors(invalid_syntax_file)) self.assertTrue( len(self.validator.get_messages(invalid_syntax_file)) == 1) self.validator.print_errors(invalid_syntax_file) for message in self.validator.get_messages(invalid_syntax_file): print(message.message) self.assertTrue( message.message.index("There was a problem parsing the file.") == 0) self.assertTrue(len(self.validator.get_messages()) == 1) self.validator.clear_messages() self.assertTrue(len(self.validator.get_messages()) == 0) def test_valid_submission_yaml_with_associated_records(self): print( '___SUBMISSION_FILE_VALIDATION: Testing valid yaml submission with associated records___' ) self.validator = None self.validator = SubmissionFileValidator() valid_sub_yaml = os.path.join(self.base_dir, self.valid_file_with_associated_records) is_valid = self.validator.validate(file_path=valid_sub_yaml) self.validator.print_errors(valid_sub_yaml) self.assertTrue(is_valid) self.assertTrue(not self.validator.has_errors(valid_sub_yaml)) def test_valid_submission_yaml_with_empty_section(self): print('___SUBMISSION_FILE_VALIDATION: Testing valid yaml ' \ 'submission without main section___') self.validator = None self.validator = SubmissionFileValidator() valid_sub_yaml = os.path.join(self.base_dir, self.valid_empty_file) self.assertEqual(self.validator.validate(file_path=valid_sub_yaml), True) self.validator.print_errors(valid_sub_yaml) def test_valid_submission_yaml_with_license(self): print('___SUBMISSION_FILE_VALIDATION: ' \ 'Testing valid yaml submission with license___') self.validator = None self.validator = SubmissionFileValidator() valid_sub_yaml = os.path.join(self.base_dir, self.valid_license_file) is_valid = self.validator.validate(file_path=valid_sub_yaml) self.validator.print_errors(valid_sub_yaml) self.assertEqual(is_valid, True) def test_invalid_submission_yaml(self): print('___SUBMISSION_FILE_VALIDATION: ' \ 'Testing invalid yaml submission___') self.validator = None self.validator = SubmissionFileValidator() invalid_sub_yaml = os.path.join(self.base_dir, self.invalid_file) self.assertEqual(self.validator.validate(file_path=invalid_sub_yaml), False) self.validator.print_errors(invalid_sub_yaml) def test_invalid_parser_submission_yaml(self): print('___SUBMISSION_FILE_VALIDATION: ' \ 'Testing invalid parser yaml submission___') self.validator = None self.validator = SubmissionFileValidator() invalid_sub_yaml = os.path.join(self.base_dir, self.invalid_parser_file) self.assertEqual(self.validator.validate(file_path=invalid_sub_yaml), False) self.validator.print_errors(invalid_sub_yaml) def test_ioerror_submission_yaml(self): print('___SUBMISSION_FILE_VALIDATION: ' \ 'Testing ioerror yaml submission___') self.validator = None self.validator = SubmissionFileValidator() invalid_sub_yaml = os.path.join(self.base_dir, self.valid_file[:-1]) self.assertEqual(self.validator.validate(file_path=invalid_sub_yaml), False) self.validator.print_errors(invalid_sub_yaml)
from hepdata_validator.submission_file_validator import SubmissionFileValidator submission_file_validator = SubmissionFileValidator() submission_file_path = 'submission/submission.yaml' # the validate method takes a string representing the file path. is_valid_submission_file = submission_file_validator.validate(file_path=submission_file_path) # if there are any error messages, they are retrievable through this call submission_file_validator.get_messages() # the error messages can be printed submission_file_validator.print_errors(submission_file_path)
'/', '-') + '.yaml' doc['data_file'] = file_name with open(file_name, 'w') as data_file: yaml.dump( { 'independent_variables': doc.pop('independent_variables', None), 'dependent_variables': doc.pop('dependent_variables', None) }, data_file, Dumper=Dumper) # Validate the submission.yaml file if validator imported. if validator_imported: submission_file_validator = SubmissionFileValidator() is_valid_submission_file = submission_file_validator.validate( file_path=submission_file_path, data=docs) if not is_valid_submission_file: print('%s is invalid HEPData YAML.' % submission_file_path) submission_file_validator.print_errors(submission_file_path) quit() else: print('%s is valid HEPData YAML.' % submission_file_path) # Loop over all YAML documents in the submission.yaml file. for doc in docs: # Skip empty YAML documents. if not doc: continue
class SubmissionFileValidationTest(unittest.TestCase): validator = None def setUp(self): self.base_dir = os.path.dirname(os.path.realpath(__file__)) self.valid_license_file = 'test_data/valid_submission_license.yaml' self.valid_file = 'test_data/valid_submission.yaml' self.valid_file_with_associated_records = 'test_data/valid_submission_with_associated_record.yaml' self.valid_empty_file = 'test_data/valid_submission_empty.yaml' self.invalid_file = 'test_data/invalid_submission.yaml' self.invalid_syntax_file = 'test_data/invalid_syntax_submission.yaml' def test_valid_submission_yaml(self): print '___SUBMISSION_FILE_VALIDATION: Testing valid yaml submission___' self.validator = None self.validator = SubmissionFileValidator() valid_sub_yaml = os.path.join(self.base_dir, self.valid_file) sub_yaml_obj = yaml.load_all(open(valid_sub_yaml, 'r')) self.validator.validate(file_path=valid_sub_yaml, data=sub_yaml_obj) self.validator.print_errors(valid_sub_yaml) def test_no_file_path_supplied(self): self.validator = SubmissionFileValidator() try: self.validator.validate(file_path=None) except LookupError as le: assert (le) def test_invalid_syntax(self): self.validator = SubmissionFileValidator() invalid_syntax_file = os.path.join(self.base_dir, self.invalid_syntax_file) self.assertFalse(self.validator.validate(file_path=invalid_syntax_file)) self.assertTrue(self.validator.has_errors(invalid_syntax_file)) self.assertTrue(len(self.validator.get_messages(invalid_syntax_file)) == 1) self.validator.print_errors(invalid_syntax_file) for message in self.validator.get_messages(invalid_syntax_file): print message.message self.assertTrue(message.message.index("There was a problem parsing the file.") == 0) self.assertTrue(len(self.validator.get_messages()) == 1) self.validator.clear_messages() self.assertTrue(len(self.validator.get_messages()) == 0) def test_valid_submission_yaml_with_associated_records(self): print '___SUBMISSION_FILE_VALIDATION: Testing valid yaml submission with associated records___' self.validator = None self.validator = SubmissionFileValidator() valid_sub_yaml = os.path.join(self.base_dir, self.valid_file_with_associated_records) self.assertTrue(self.validator.validate(file_path=valid_sub_yaml)) self.assertTrue(not self.validator.has_errors(valid_sub_yaml)) self.validator.print_errors(valid_sub_yaml) def test_valid_submission_yaml_with_empty_section(self): print '___SUBMISSION_FILE_VALIDATION: Testing valid yaml ' \ 'submission without main section___' self.validator = None self.validator = SubmissionFileValidator() valid_sub_yaml = os.path.join(self.base_dir, self.valid_empty_file) self.assertEqual(self.validator.validate(file_path=valid_sub_yaml), True) self.validator.print_errors(valid_sub_yaml) def test_valid_submission_yaml_with_license(self): print '___SUBMISSION_FILE_VALIDATION: ' \ 'Testing valid yaml submission with license___' self.validator = None self.validator = SubmissionFileValidator() valid_sub_yaml = os.path.join(self.base_dir, self.valid_license_file) self.assertEqual(self.validator.validate(file_path=valid_sub_yaml), True) self.validator.print_errors(valid_sub_yaml) def test_invalid_submission_yaml(self): print '___SUBMISSION_FILE_VALIDATION: ' \ 'Testing invalid yaml submission___' self.validator = None self.validator = SubmissionFileValidator() invalid_sub_yaml = os.path.join(self.base_dir, self.invalid_file) self.assertEqual(self.validator.validate( file_path=invalid_sub_yaml), False ) self.validator.print_errors(invalid_sub_yaml)
def parse(self, data_in, *args, **kwargs): """ :param data_in: path to submission.yaml :param args: :param kwargs: :raise ValueError: """ if not os.path.exists(data_in): raise ValueError("File / Directory does not exist: %s" % data_in) if os.path.isdir(data_in): submission_filepath = os.path.join(data_in, 'submission.yaml') if not os.path.exists(submission_filepath): submission_filepath = os.path.join(data_in, 'submission.yml') if not os.path.exists(submission_filepath): raise ValueError("No submission file in %s" % data_in) data_in = submission_filepath # first validate submission file: with open(data_in, 'r') as submission_file: submission_data = list( yaml.load_all(submission_file, Loader=Loader)) if len(submission_data) == 0: raise RuntimeError("Submission file (%s) is empty" % data_in) submission_file_validator = SubmissionFileValidator( schema_version=self.validator_schema_version) if not submission_file_validator.validate(file_path=data_in, data=submission_data): raise RuntimeError( "Submission file (%s) did not pass validation: %s" % (data_in, self._pretty_print_errors( submission_file_validator.get_messages()))) metadata = {} tables = [] # validator for table data data_file_validator = DataFileValidator( schema_version=self.validator_schema_version) index = 0 for i in range(0, len(submission_data)): if not submission_data[i]: # empty YAML document continue if 'data_file' not in submission_data[i]: metadata = submission_data[ i] # information about whole submission continue table_filepath = os.path.join(os.path.dirname(data_in), submission_data[i]['data_file']) with open(table_filepath, 'r') as table_file: if not os.path.exists(table_filepath): raise ValueError("table file: %s does not exist" % table.data_file) table_data = yaml.load(table_file, Loader=Loader) if not data_file_validator.validate(data=table_data, file_path=table_filepath): raise RuntimeError( "Data file (%s) did not pass validation: %s" % (table_filepath, self._pretty_print_errors( data_file_validator.get_messages()))) index = index + 1 table = Table(index=index, metadata=submission_data[i], data=table_data) tables.append(table) return ParsedData(metadata, tables)
def process_submission_directory(basepath, submission_file_path, recid, update=False, *args, **kwargs): """ Goes through an entire submission directory and processes the files within to create DataSubmissions with the files and related material attached as DataResources. :param basepath: :param submission_file_path: :param recid: :return: """ added_file_names = [] errors = {} if submission_file_path is not None: submission_file = open(submission_file_path, 'r') submission_file_validator = SubmissionFileValidator() is_valid_submission_file = submission_file_validator.validate( file_path=submission_file_path) data_file_validator = DataFileValidator() if is_valid_submission_file: try: submission_processed = yaml.load_all(submission_file, Loader=yaml.CSafeLoader) except: submission_processed = yaml.safe_load_all(submission_file) # process file, extracting contents, and linking # the data record with the parent publication hepsubmission = get_latest_hepsubmission(publication_recid=recid) if hepsubmission is None: HEPSubmission(publication_recid=recid, overall_status='todo', inspire_id=hepsubmission.inspire_id, coordinator=kwargs.get('user_id') if 'user_id' in kwargs else int(current_user.get_id()), version=hepsubmission.version + 1) # On a new upload, we reset the flag to notify reviewers hepsubmission.reviewers_notified = False # if it is finished and we receive an update, # then we need to reopen the submission to allow for revisions. if hepsubmission.overall_status == 'finished' and not update: # we create a new HEPSubmission object _rev_hepsubmission = HEPSubmission( publication_recid=recid, overall_status='todo', inspire_id=hepsubmission.inspire_id, coordinator=hepsubmission.coordinator, version=hepsubmission.version + 1) db.session.add(_rev_hepsubmission) hepsubmission = _rev_hepsubmission reserve_doi_for_hepsubmission(hepsubmission, update) for yaml_document in submission_processed: if 'record_ids' in yaml_document or 'comment' in yaml_document or 'modifications' in yaml_document: # comments are only present in the general submission # information document. process_general_submission_info(basepath, yaml_document, recid) else: existing_datasubmission_query = DataSubmission.query \ .filter_by(name=encode_string(yaml_document["name"]), publication_recid=recid, version=hepsubmission.version) added_file_names.append(yaml_document["name"]) if existing_datasubmission_query.count() == 0: datasubmission = DataSubmission( publication_recid=recid, name=encode_string(yaml_document["name"]), description=encode_string( yaml_document["description"]), version=hepsubmission.version) else: datasubmission = existing_datasubmission_query.one() datasubmission.description = encode_string( yaml_document["description"]) db.session.add(datasubmission) main_file_path = os.path.join(basepath, yaml_document["data_file"]) if data_file_validator.validate(file_path=main_file_path): process_data_file(recid, hepsubmission.version, basepath, yaml_document, datasubmission, main_file_path) else: errors = process_validation_errors_for_display( data_file_validator.get_messages()) data_file_validator.clear_messages() cleanup_submission(recid, hepsubmission.version, added_file_names) db.session.commit() if len(errors) is 0: package_submission(basepath, recid, hepsubmission) reserve_dois_for_data_submissions(recid, hepsubmission.version) admin_indexer = AdminIndexer() admin_indexer.index_submission(hepsubmission) else: errors = process_validation_errors_for_display( submission_file_validator.get_messages()) submission_file_validator.clear_messages() data_file_validator.clear_messages() else: # return an error errors = { "submission.yaml": [{ "level": "error", "message": "No submission.yaml file found in submission." }] } return errors # we return all the errors collectively. # This makes more sense that returning errors as # soon as problems are found on one file. return errors
def test_invalid_schema_version(): with pytest.raises(ValueError) as excinfo: validator = SubmissionFileValidator(schema_version='0.9999.99') assert "Invalid schema version 0.9999.99" == str(excinfo.value)
def validator_v1(): return SubmissionFileValidator(schema_version='1.0.1')
def validator_v0(): return SubmissionFileValidator(schema_version='0.1.0')
def parse(self, data_in, *args, **kwargs): """ :param data_in: path to submission.yaml :param args: :param kwargs: :raise ValueError: """ if not os.path.exists(data_in): raise ValueError("File / Directory does not exist: %s" % data_in) if os.path.isdir(data_in): submission_filepath = os.path.join(data_in, 'submission.yaml') if not os.path.exists(submission_filepath): submission_filepath = os.path.join(data_in, 'submission.yml') if not os.path.exists(submission_filepath): raise ValueError("No submission file in %s" % data_in) data_in = submission_filepath # first validate submission file: with open(data_in, 'r') as submission_file: try: submission_data = list( yaml.load_all(submission_file, Loader=yaml.CSafeLoader)) except: # pragma: no cover submission_data = list( yaml.load_all(submission_file)) # pragma: no cover if len(submission_data) == 0: raise RuntimeError("Submission file (%s) is empty" % data_in) submission_file_validator = SubmissionFileValidator() if not submission_file_validator.validate(file_path=data_in, data=submission_data): raise RuntimeError( "Submission file (%s) did not pass validation: %s" % (data_in, self._pretty_print_errors( submission_file_validator.get_messages()))) tables = [] # validator for table data data_file_validator = DataFileValidator() for i in range(1, len(submission_data)): table_filepath = os.path.join(os.path.dirname(data_in), submission_data[i]['data_file']) with open(table_filepath, 'r') as table_file: if not os.path.exists(table_filepath): raise ValueError( "table file: %s does not exist" % table.data_file) try: # We try to load using the CLoader for speed improvements. table_data = yaml.load(table_file, Loader=yaml.CSafeLoader) except: # pragma: no cover table_data = yaml.load(table_file) # pragma: no cover if not data_file_validator.validate(data=table_data, file_path=table_filepath): raise RuntimeError( "Data file (%s) did not pass validation: %s" % (table_filepath, self._pretty_print_errors( data_file_validator.get_messages()))) table = Table(index=i, metadata=submission_data[i], data=table_data) tables.append(table) return ParsedData(submission_data[0], tables)
def process_submission_directory(basepath, submission_file_path, recid, update=False, *args, **kwargs): """ Goes through an entire submission directory and processes the files within to create DataSubmissions with the files and related material attached as DataResources. :param basepath: :param submission_file_path: :param recid: :return: """ added_file_names = [] errors = {} if submission_file_path is not None: submission_file = open(submission_file_path, 'r') submission_file_validator = SubmissionFileValidator() is_valid_submission_file = submission_file_validator.validate( file_path=submission_file_path) data_file_validator = DataFileValidator() if is_valid_submission_file: try: submission_processed = yaml.load_all(submission_file, Loader=yaml.CSafeLoader) except: submission_processed = yaml.safe_load_all(submission_file) # process file, extracting contents, and linking # the data record with the parent publication hepsubmission = get_latest_hepsubmission(publication_recid=recid) if hepsubmission is None: HEPSubmission(publication_recid=recid, overall_status='todo', inspire_id=hepsubmission.inspire_id, coordinator=kwargs.get('user_id') if 'user_id' in kwargs else int(current_user.get_id()), version=hepsubmission.version + 1) # On a new upload, we reset the flag to notify reviewers hepsubmission.reviewers_notified = False # if it is finished and we receive an update, # then we need to reopen the submission to allow for revisions. if hepsubmission.overall_status == 'finished' and not update: # we create a new HEPSubmission object _rev_hepsubmission = HEPSubmission(publication_recid=recid, overall_status='todo', inspire_id=hepsubmission.inspire_id, coordinator=hepsubmission.coordinator, version=hepsubmission.version + 1) db.session.add(_rev_hepsubmission) hepsubmission = _rev_hepsubmission reserve_doi_for_hepsubmission(hepsubmission) for yaml_document in submission_processed: if 'record_ids' in yaml_document or 'comment' in yaml_document or 'modifications' in yaml_document: # comments are only present in the general submission # information document. process_general_submission_info(basepath, yaml_document, recid) else: existing_datasubmission_query = DataSubmission.query \ .filter_by(name=encode_string(yaml_document["name"]), publication_recid=recid, version=hepsubmission.version) added_file_names.append(yaml_document["name"]) if existing_datasubmission_query.count() == 0: datasubmission = DataSubmission( publication_recid=recid, name=encode_string(yaml_document["name"]), description=encode_string( yaml_document["description"]), version=hepsubmission.version) else: datasubmission = existing_datasubmission_query.one() datasubmission.description = encode_string( yaml_document["description"]) db.session.add(datasubmission) main_file_path = os.path.join(basepath, yaml_document["data_file"]) if data_file_validator.validate(file_path=main_file_path): process_data_file(recid, hepsubmission.version, basepath, yaml_document, datasubmission, main_file_path) else: errors = process_validation_errors_for_display( data_file_validator.get_messages()) data_file_validator.clear_messages() cleanup_submission(recid, hepsubmission.version, added_file_names) db.session.commit() if len(errors) is 0: package_submission(basepath, recid, hepsubmission) reserve_dois_for_data_submissions(recid, hepsubmission.version) admin_indexer = AdminIndexer() admin_indexer.index_submission(hepsubmission) else: errors = process_validation_errors_for_display( submission_file_validator.get_messages()) submission_file_validator.clear_messages() data_file_validator.clear_messages() else: # return an error errors = {"submission.yaml": [ {"level": "error", "message": "No submission.yaml file found in submission."} ]} return errors # we return all the errors collectively. # This makes more sense that returning errors as # soon as problems are found on one file. return errors