Пример #1
0
    def test_valid_submission_yaml(self):
        print(
            '___SUBMISSION_FILE_VALIDATION: Testing valid yaml submission___')

        self.validator = None
        self.validator = SubmissionFileValidator()
        valid_sub_yaml = os.path.join(self.base_dir, self.valid_file)
        sub_yaml_obj = yaml.load_all(open(valid_sub_yaml, 'r'), Loader=Loader)
        self.validator.validate(file_path=valid_sub_yaml, data=sub_yaml_obj)
        self.validator.print_errors(valid_sub_yaml)
Пример #2
0
    def test_valid_submission_yaml_with_associated_records(self):
        print '___SUBMISSION_FILE_VALIDATION: Testing valid yaml submission with associated records___'

        self.validator = None
        self.validator = SubmissionFileValidator()
        valid_sub_yaml = os.path.join(self.base_dir,
                                      self.valid_file_with_associated_records)

        self.assertTrue(self.validator.validate(file_path=valid_sub_yaml))
        self.assertTrue(not self.validator.has_errors(valid_sub_yaml))
        self.validator.print_errors(valid_sub_yaml)
Пример #3
0
    def test_ioerror_submission_yaml(self):
        print('___SUBMISSION_FILE_VALIDATION: ' \
              'Testing ioerror yaml submission___')
        self.validator = None
        self.validator = SubmissionFileValidator()
        invalid_sub_yaml = os.path.join(self.base_dir, self.valid_file[:-1])

        self.assertEqual(self.validator.validate(file_path=invalid_sub_yaml),
                         False)

        self.validator.print_errors(invalid_sub_yaml)
Пример #4
0
    def test_valid_submission_yaml_with_empty_section(self):
        print('___SUBMISSION_FILE_VALIDATION: Testing valid yaml ' \
              'submission without main section___')

        self.validator = None
        self.validator = SubmissionFileValidator()
        valid_sub_yaml = os.path.join(self.base_dir, self.valid_empty_file)

        self.assertEqual(self.validator.validate(file_path=valid_sub_yaml),
                         True)
        self.validator.print_errors(valid_sub_yaml)
Пример #5
0
def get_submission_validator(old_schema):
    """
    Returns a SubmissionFileValidator object

    :param old_schema: whether the schema version for the submission.yaml is 0.1.0
    :return: SubmissionFileValidator object
    """
    if old_schema:
        return SubmissionFileValidator(schema_version='0.1.0')
    else:
        return SubmissionFileValidator()
Пример #6
0
    def test_valid_submission_yaml_with_license(self):
        print('___SUBMISSION_FILE_VALIDATION: ' \
              'Testing valid yaml submission with license___')

        self.validator = None
        self.validator = SubmissionFileValidator()
        valid_sub_yaml = os.path.join(self.base_dir, self.valid_license_file)

        is_valid = self.validator.validate(file_path=valid_sub_yaml)
        self.validator.print_errors(valid_sub_yaml)
        self.assertEqual(is_valid, True)
Пример #7
0
    def test_valid_submission_yaml(self):
        print '___SUBMISSION_FILE_VALIDATION: Testing valid yaml submission___'

        self.validator = None
        self.validator = SubmissionFileValidator()
        valid_sub_yaml = os.path.join(self.base_dir, self.valid_file)

        sub_yaml_obj = yaml.load_all(open(valid_sub_yaml, 'r'))
        self.validator.validate(file_path=valid_sub_yaml, data=sub_yaml_obj)
        self.validator.print_errors(valid_sub_yaml)
Пример #8
0
def test_invalid_schema_file():
    # Fudge the schema versions constant so we can check the file check works
    VALID_SCHEMA_VERSIONS.append('0.9999.9999')
    try:
        with pytest.raises(ValueError) as excinfo:
            validator = SubmissionFileValidator(schema_version='0.9999.9999')

        assert "Invalid schema file" in str(excinfo.value)
    finally:
        VALID_SCHEMA_VERSIONS.pop()
Пример #9
0
    def test_valid_submission_yaml_with_empty_section(self):
        print '___SUBMISSION_FILE_VALIDATION: Testing valid yaml ' \
              'submission without main section___'

        self.validator = None
        self.validator = SubmissionFileValidator()
        valid_sub_yaml = os.path.join(self.base_dir, self.valid_empty_file)

        self.assertEqual(self.validator.validate(file_path=valid_sub_yaml), True)
        self.validator.print_errors(valid_sub_yaml)
Пример #10
0
    def test_valid_submission_yaml_with_associated_records(self):
        print '___SUBMISSION_FILE_VALIDATION: Testing valid yaml submission with associated records___'

        self.validator = None
        self.validator = SubmissionFileValidator()
        valid_sub_yaml = os.path.join(self.base_dir, self.valid_file_with_associated_records)

        self.assertTrue(self.validator.validate(file_path=valid_sub_yaml))
        self.assertTrue(not self.validator.has_errors(valid_sub_yaml))
        self.validator.print_errors(valid_sub_yaml)
Пример #11
0
    def test_invalid_syntax(self):
        self.validator = SubmissionFileValidator()
        invalid_syntax_file = os.path.join(self.base_dir,
                                           self.invalid_syntax_file)

        self.assertFalse(
            self.validator.validate(file_path=invalid_syntax_file))

        self.assertTrue(self.validator.has_errors(invalid_syntax_file))
        self.assertTrue(
            len(self.validator.get_messages(invalid_syntax_file)) == 1)
        self.validator.print_errors(invalid_syntax_file)
        for message in self.validator.get_messages(invalid_syntax_file):
            print(message.message)
            self.assertTrue(
                message.message.index("There was a problem parsing the file.")
                == 0)

        self.assertTrue(len(self.validator.get_messages()) == 1)
        self.validator.clear_messages()
        self.assertTrue(len(self.validator.get_messages()) == 0)
Пример #12
0
    def test_invalid_submission_yaml(self):
        print '___SUBMISSION_FILE_VALIDATION: ' \
              'Testing invalid yaml submission___'
        self.validator = None
        self.validator = SubmissionFileValidator()
        invalid_sub_yaml = os.path.join(self.base_dir, self.invalid_file)

        self.assertEqual(self.validator.validate(
            file_path=invalid_sub_yaml), False
        )

        self.validator.print_errors(invalid_sub_yaml)
Пример #13
0
    def test_valid_submission_yaml_with_license(self):
        print '___SUBMISSION_FILE_VALIDATION: ' \
              'Testing valid yaml submission with license___'

        self.validator = None
        self.validator = SubmissionFileValidator()
        valid_sub_yaml = os.path.join(self.base_dir,
                                      self.valid_license_file)

        self.assertEqual(self.validator.validate(file_path=valid_sub_yaml), True)

        self.validator.print_errors(valid_sub_yaml)
Пример #14
0
    def test_invalid_syntax(self):
        self.validator = SubmissionFileValidator()
        invalid_syntax_file = os.path.join(self.base_dir, self.invalid_syntax_file)

        self.assertFalse(self.validator.validate(file_path=invalid_syntax_file))

        self.assertTrue(self.validator.has_errors(invalid_syntax_file))
        self.assertTrue(len(self.validator.get_messages(invalid_syntax_file)) == 1)
        self.validator.print_errors(invalid_syntax_file)
        for message in self.validator.get_messages(invalid_syntax_file):
            print message.message
            self.assertTrue(message.message.index("There was a problem parsing the file.") == 0)

        self.assertTrue(len(self.validator.get_messages()) == 1)
        self.validator.clear_messages()
        self.assertTrue(len(self.validator.get_messages()) == 0)
Пример #15
0
def process_submission_directory(basepath,
                                 submission_file_path,
                                 recid,
                                 update=False,
                                 *args,
                                 **kwargs):
    """
    Goes through an entire submission directory and processes the
    files within to create DataSubmissions
    with the files and related material attached as DataResources.

    :param basepath:
    :param submission_file_path:
    :param recid:
    :param update:
    :return:
    """
    added_file_names = []
    errors = {}

    if submission_file_path is not None:

        submission_file_validator = SubmissionFileValidator()
        is_valid_submission_file = submission_file_validator.validate(
            file_path=submission_file_path)

        if is_valid_submission_file:

            submission_file = open(submission_file_path, 'r')
            submission_processed = yaml.load_all(submission_file,
                                                 Loader=Loader)

            # process file, extracting contents, and linking
            # the data record with the parent publication
            hepsubmission = get_latest_hepsubmission(publication_recid=recid)
            if hepsubmission is None:
                HEPSubmission(publication_recid=recid,
                              overall_status='todo',
                              inspire_id=hepsubmission.inspire_id,
                              coordinator=kwargs.get('user_id') if 'user_id'
                              in kwargs else int(current_user.get_id()),
                              version=hepsubmission.version + 1)

            # On a new upload, we reset the flag to notify reviewers
            hepsubmission.reviewers_notified = False

            # if it is finished and we receive an update,
            # then we need to reopen the submission to allow for revisions.
            if hepsubmission.overall_status == 'finished' and not update:
                # we create a new HEPSubmission object
                _rev_hepsubmission = HEPSubmission(
                    publication_recid=recid,
                    overall_status='todo',
                    inspire_id=hepsubmission.inspire_id,
                    coordinator=hepsubmission.coordinator,
                    version=hepsubmission.version + 1)
                db.session.add(_rev_hepsubmission)
                hepsubmission = _rev_hepsubmission

            reserve_doi_for_hepsubmission(hepsubmission, update)

            no_general_submission_info = True

            data_file_validator = DataFileValidator()

            # Delete all data records associated with this submission.
            # Fixes problems with ordering where the table names are changed between uploads.
            # See https://github.com/HEPData/hepdata/issues/112
            # Side effect that reviews will be deleted between uploads.
            cleanup_submission(recid, hepsubmission.version, added_file_names)

            for yaml_document_index, yaml_document in enumerate(
                    submission_processed):
                if not yaml_document:
                    continue

                # Check for presence of local files given as additional_resources.
                if 'additional_resources' in yaml_document:
                    for resource in yaml_document['additional_resources']:
                        location = os.path.join(basepath, resource['location'])
                        if not resource['location'].startswith(
                            ('http', '/resource/')):
                            if not os.path.isfile(location):
                                errors[resource['location']] = [{
                                    "level":
                                    "error",
                                    "message":
                                    "Missing 'additional_resources' file from uploaded archive."
                                }]
                            elif '/' in resource['location']:
                                errors[resource['location']] = [{
                                    "level":
                                    "error",
                                    "message":
                                    "Location of 'additional_resources' file should not contain '/'."
                                }]

                if not yaml_document_index and 'name' not in yaml_document:

                    no_general_submission_info = False
                    process_general_submission_info(basepath, yaml_document,
                                                    recid)

                elif not all(k in yaml_document
                             for k in ('name', 'description', 'keywords',
                                       'data_file')):

                    errors["submission.yaml"] = [{
                        "level":
                        "error",
                        "message":
                        "YAML document with index {} ".format(
                            yaml_document_index) +
                        "missing one or more required keys (name, description, keywords, data_file)."
                    }]

                else:

                    existing_datasubmission_query = DataSubmission.query \
                        .filter_by(name=encode_string(yaml_document["name"]),
                                   publication_recid=recid,
                                   version=hepsubmission.version)

                    added_file_names.append(yaml_document["name"])

                    try:
                        if existing_datasubmission_query.count() == 0:
                            datasubmission = DataSubmission(
                                publication_recid=recid,
                                name=encode_string(yaml_document["name"]),
                                description=encode_string(
                                    yaml_document["description"]),
                                version=hepsubmission.version)
                        else:
                            datasubmission = existing_datasubmission_query.one(
                            )
                            datasubmission.description = encode_string(
                                yaml_document["description"])
                        db.session.add(datasubmission)
                    except SQLAlchemyError as sqlex:
                        errors[yaml_document["data_file"]] = [{
                            "level":
                            "error",
                            "message":
                            str(sqlex)
                        }]
                        db.session.rollback()
                        continue

                    main_file_path = os.path.join(basepath,
                                                  yaml_document["data_file"])

                    data, ex = _eos_fix_read_data(main_file_path)

                    if not data or data is None or ex is not None:

                        errors[yaml_document["data_file"]] = \
                            [{"level": "error", "message": "There was a problem parsing the file.\n" + str(ex)}]

                    elif '/' in yaml_document["data_file"]:

                        errors[yaml_document["data_file"]] = \
                            [{"level": "error", "message": "Name of data_file should not contain '/'.\n"}]

                    else:

                        if data_file_validator.validate(
                                file_path=main_file_path, data=data):
                            try:
                                process_data_file(recid, hepsubmission.version,
                                                  basepath, yaml_document,
                                                  datasubmission,
                                                  main_file_path)
                            except SQLAlchemyError as sqlex:
                                errors[yaml_document["data_file"]] = [{
                                    "level":
                                    "error",
                                    "message":
                                    "There was a problem processing the file.\n"
                                    + str(sqlex)
                                }]
                                db.session.rollback()
                        else:
                            errors = process_validation_errors_for_display(
                                data_file_validator.get_messages())
                            data_file_validator.clear_messages()

                        if yaml_document["data_file"] not in errors:
                            # Check that the length of the 'values' list is consistent
                            # for each of the independent_variables and dependent_variables.
                            indep_count = [
                                len(indep['values'])
                                for indep in data['independent_variables']
                            ]
                            dep_count = [
                                len(dep['values'])
                                for dep in data['dependent_variables']
                            ]
                            if len(set(indep_count + dep_count)
                                   ) > 1:  # if more than one unique count
                                errors.setdefault(
                                    yaml_document["data_file"], []
                                ).append({
                                    "level":
                                    "error",
                                    "message":
                                    "Inconsistent length of 'values' list:\n" +
                                    "independent_variables{}, dependent_variables{}"
                                    .format(str(indep_count), str(dep_count))
                                })

            submission_file.close()

            if no_general_submission_info:
                hepsubmission.last_updated = datetime.now()
                db.session.add(hepsubmission)
                db.session.commit()

            # The line below is commented out since it does not preserve the order of tables.
            # Delete all tables above instead: side effect of deleting reviews between uploads.
            #cleanup_submission(recid, hepsubmission.version, added_file_names)

            db.session.commit()

            if len(errors) is 0:
                errors = package_submission(basepath, recid, hepsubmission)
                reserve_dois_for_data_submissions(
                    publication_recid=recid, version=hepsubmission.version)

                admin_indexer = AdminIndexer()
                admin_indexer.index_submission(hepsubmission)

            else:  # delete all tables if errors
                cleanup_submission(recid, hepsubmission.version, {})

        else:

            errors = process_validation_errors_for_display(
                submission_file_validator.get_messages())
            submission_file_validator.clear_messages()

    else:
        # return an error
        errors = {
            "submission.yaml": [{
                "level":
                "error",
                "message":
                "No submission.yaml file found in submission."
            }]
        }
        return errors

    # we return all the errors collectively.
    # This makes more sense that returning errors as
    # soon as problems are found on one file.
    return errors
Пример #16
0
 def test_no_file_path_supplied(self):
     self.validator = SubmissionFileValidator()
     try:
         self.validator.validate(file_path=None)
     except LookupError as le:
         assert (le)
Пример #17
0
class SubmissionFileValidationTest(unittest.TestCase):
    validator = None

    def setUp(self):
        self.base_dir = os.path.dirname(os.path.realpath(__file__))

        self.valid_license_file = 'test_data/valid_submission_license.yaml'
        self.valid_file = 'test_data/valid_submission.yaml'
        self.valid_file_with_associated_records = 'test_data/valid_submission_with_associated_record.yaml'
        self.valid_empty_file = 'test_data/valid_submission_empty.yaml'
        self.invalid_file = 'test_data/invalid_submission.yaml'
        self.invalid_syntax_file = 'test_data/invalid_syntax_submission.yaml'
        self.invalid_parser_file = 'test_data/invalid_parser_submission.yaml'

    def test_valid_submission_yaml(self):
        print(
            '___SUBMISSION_FILE_VALIDATION: Testing valid yaml submission___')

        self.validator = None
        self.validator = SubmissionFileValidator()
        valid_sub_yaml = os.path.join(self.base_dir, self.valid_file)
        sub_yaml_obj = yaml.load_all(open(valid_sub_yaml, 'r'), Loader=Loader)
        self.validator.validate(file_path=valid_sub_yaml, data=sub_yaml_obj)
        self.validator.print_errors(valid_sub_yaml)

    def test_no_file_path_supplied(self):
        self.validator = SubmissionFileValidator()
        try:
            self.validator.validate(file_path=None)
        except LookupError as le:
            assert (le)

    def test_invalid_syntax(self):
        self.validator = SubmissionFileValidator()
        invalid_syntax_file = os.path.join(self.base_dir,
                                           self.invalid_syntax_file)

        self.assertFalse(
            self.validator.validate(file_path=invalid_syntax_file))

        self.assertTrue(self.validator.has_errors(invalid_syntax_file))
        self.assertTrue(
            len(self.validator.get_messages(invalid_syntax_file)) == 1)
        self.validator.print_errors(invalid_syntax_file)
        for message in self.validator.get_messages(invalid_syntax_file):
            print(message.message)
            self.assertTrue(
                message.message.index("There was a problem parsing the file.")
                == 0)

        self.assertTrue(len(self.validator.get_messages()) == 1)
        self.validator.clear_messages()
        self.assertTrue(len(self.validator.get_messages()) == 0)

    def test_valid_submission_yaml_with_associated_records(self):
        print(
            '___SUBMISSION_FILE_VALIDATION: Testing valid yaml submission with associated records___'
        )

        self.validator = None
        self.validator = SubmissionFileValidator()
        valid_sub_yaml = os.path.join(self.base_dir,
                                      self.valid_file_with_associated_records)
        is_valid = self.validator.validate(file_path=valid_sub_yaml)
        self.validator.print_errors(valid_sub_yaml)

        self.assertTrue(is_valid)
        self.assertTrue(not self.validator.has_errors(valid_sub_yaml))

    def test_valid_submission_yaml_with_empty_section(self):
        print('___SUBMISSION_FILE_VALIDATION: Testing valid yaml ' \
              'submission without main section___')

        self.validator = None
        self.validator = SubmissionFileValidator()
        valid_sub_yaml = os.path.join(self.base_dir, self.valid_empty_file)

        self.assertEqual(self.validator.validate(file_path=valid_sub_yaml),
                         True)
        self.validator.print_errors(valid_sub_yaml)

    def test_valid_submission_yaml_with_license(self):
        print('___SUBMISSION_FILE_VALIDATION: ' \
              'Testing valid yaml submission with license___')

        self.validator = None
        self.validator = SubmissionFileValidator()
        valid_sub_yaml = os.path.join(self.base_dir, self.valid_license_file)

        is_valid = self.validator.validate(file_path=valid_sub_yaml)
        self.validator.print_errors(valid_sub_yaml)
        self.assertEqual(is_valid, True)

    def test_invalid_submission_yaml(self):
        print('___SUBMISSION_FILE_VALIDATION: ' \
              'Testing invalid yaml submission___')
        self.validator = None
        self.validator = SubmissionFileValidator()
        invalid_sub_yaml = os.path.join(self.base_dir, self.invalid_file)

        self.assertEqual(self.validator.validate(file_path=invalid_sub_yaml),
                         False)

        self.validator.print_errors(invalid_sub_yaml)

    def test_invalid_parser_submission_yaml(self):
        print('___SUBMISSION_FILE_VALIDATION: ' \
              'Testing invalid parser yaml submission___')
        self.validator = None
        self.validator = SubmissionFileValidator()
        invalid_sub_yaml = os.path.join(self.base_dir,
                                        self.invalid_parser_file)

        self.assertEqual(self.validator.validate(file_path=invalid_sub_yaml),
                         False)

        self.validator.print_errors(invalid_sub_yaml)

    def test_ioerror_submission_yaml(self):
        print('___SUBMISSION_FILE_VALIDATION: ' \
              'Testing ioerror yaml submission___')
        self.validator = None
        self.validator = SubmissionFileValidator()
        invalid_sub_yaml = os.path.join(self.base_dir, self.valid_file[:-1])

        self.assertEqual(self.validator.validate(file_path=invalid_sub_yaml),
                         False)

        self.validator.print_errors(invalid_sub_yaml)
Пример #18
0
from hepdata_validator.submission_file_validator import SubmissionFileValidator

submission_file_validator = SubmissionFileValidator()
submission_file_path = 'submission/submission.yaml'

# the validate method takes a string representing the file path.
is_valid_submission_file = submission_file_validator.validate(file_path=submission_file_path)

# if there are any error messages, they are retrievable through this call
submission_file_validator.get_messages()

# the error messages can be printed
submission_file_validator.print_errors(submission_file_path)
Пример #19
0
                    '/', '-') + '.yaml'
                doc['data_file'] = file_name
                with open(file_name, 'w') as data_file:
                    yaml.dump(
                        {
                            'independent_variables':
                            doc.pop('independent_variables', None),
                            'dependent_variables':
                            doc.pop('dependent_variables', None)
                        },
                        data_file,
                        Dumper=Dumper)

    # Validate the submission.yaml file if validator imported.
    if validator_imported:
        submission_file_validator = SubmissionFileValidator()
        is_valid_submission_file = submission_file_validator.validate(
            file_path=submission_file_path, data=docs)
        if not is_valid_submission_file:
            print('%s is invalid HEPData YAML.' % submission_file_path)
            submission_file_validator.print_errors(submission_file_path)
            quit()
        else:
            print('%s is valid HEPData YAML.' % submission_file_path)

    # Loop over all YAML documents in the submission.yaml file.
    for doc in docs:

        # Skip empty YAML documents.
        if not doc:
            continue
Пример #20
0
class SubmissionFileValidationTest(unittest.TestCase):
    validator = None

    def setUp(self):
        self.base_dir = os.path.dirname(os.path.realpath(__file__))

        self.valid_license_file = 'test_data/valid_submission_license.yaml'
        self.valid_file = 'test_data/valid_submission.yaml'
        self.valid_file_with_associated_records = 'test_data/valid_submission_with_associated_record.yaml'
        self.valid_empty_file = 'test_data/valid_submission_empty.yaml'
        self.invalid_file = 'test_data/invalid_submission.yaml'
        self.invalid_syntax_file = 'test_data/invalid_syntax_submission.yaml'

    def test_valid_submission_yaml(self):
        print '___SUBMISSION_FILE_VALIDATION: Testing valid yaml submission___'

        self.validator = None
        self.validator = SubmissionFileValidator()
        valid_sub_yaml = os.path.join(self.base_dir, self.valid_file)

        sub_yaml_obj = yaml.load_all(open(valid_sub_yaml, 'r'))
        self.validator.validate(file_path=valid_sub_yaml, data=sub_yaml_obj)
        self.validator.print_errors(valid_sub_yaml)

    def test_no_file_path_supplied(self):
        self.validator = SubmissionFileValidator()
        try:
            self.validator.validate(file_path=None)
        except LookupError as le:
            assert (le)

    def test_invalid_syntax(self):
        self.validator = SubmissionFileValidator()
        invalid_syntax_file = os.path.join(self.base_dir, self.invalid_syntax_file)

        self.assertFalse(self.validator.validate(file_path=invalid_syntax_file))

        self.assertTrue(self.validator.has_errors(invalid_syntax_file))
        self.assertTrue(len(self.validator.get_messages(invalid_syntax_file)) == 1)
        self.validator.print_errors(invalid_syntax_file)
        for message in self.validator.get_messages(invalid_syntax_file):
            print message.message
            self.assertTrue(message.message.index("There was a problem parsing the file.") == 0)

        self.assertTrue(len(self.validator.get_messages()) == 1)
        self.validator.clear_messages()
        self.assertTrue(len(self.validator.get_messages()) == 0)

    def test_valid_submission_yaml_with_associated_records(self):
        print '___SUBMISSION_FILE_VALIDATION: Testing valid yaml submission with associated records___'

        self.validator = None
        self.validator = SubmissionFileValidator()
        valid_sub_yaml = os.path.join(self.base_dir, self.valid_file_with_associated_records)

        self.assertTrue(self.validator.validate(file_path=valid_sub_yaml))
        self.assertTrue(not self.validator.has_errors(valid_sub_yaml))
        self.validator.print_errors(valid_sub_yaml)

    def test_valid_submission_yaml_with_empty_section(self):
        print '___SUBMISSION_FILE_VALIDATION: Testing valid yaml ' \
              'submission without main section___'

        self.validator = None
        self.validator = SubmissionFileValidator()
        valid_sub_yaml = os.path.join(self.base_dir, self.valid_empty_file)

        self.assertEqual(self.validator.validate(file_path=valid_sub_yaml), True)
        self.validator.print_errors(valid_sub_yaml)

    def test_valid_submission_yaml_with_license(self):
        print '___SUBMISSION_FILE_VALIDATION: ' \
              'Testing valid yaml submission with license___'

        self.validator = None
        self.validator = SubmissionFileValidator()
        valid_sub_yaml = os.path.join(self.base_dir,
                                      self.valid_license_file)

        self.assertEqual(self.validator.validate(file_path=valid_sub_yaml), True)

        self.validator.print_errors(valid_sub_yaml)

    def test_invalid_submission_yaml(self):
        print '___SUBMISSION_FILE_VALIDATION: ' \
              'Testing invalid yaml submission___'
        self.validator = None
        self.validator = SubmissionFileValidator()
        invalid_sub_yaml = os.path.join(self.base_dir, self.invalid_file)

        self.assertEqual(self.validator.validate(
            file_path=invalid_sub_yaml), False
        )

        self.validator.print_errors(invalid_sub_yaml)
Пример #21
0
    def parse(self, data_in, *args, **kwargs):
        """
        :param data_in: path to submission.yaml
        :param args:
        :param kwargs:
        :raise ValueError:
        """
        if not os.path.exists(data_in):
            raise ValueError("File / Directory does not exist: %s" % data_in)

        if os.path.isdir(data_in):
            submission_filepath = os.path.join(data_in, 'submission.yaml')
            if not os.path.exists(submission_filepath):
                submission_filepath = os.path.join(data_in, 'submission.yml')
                if not os.path.exists(submission_filepath):
                    raise ValueError("No submission file in %s" % data_in)
            data_in = submission_filepath

        # first validate submission file:
        with open(data_in, 'r') as submission_file:
            submission_data = list(
                yaml.load_all(submission_file, Loader=Loader))

            if len(submission_data) == 0:
                raise RuntimeError("Submission file (%s) is empty" % data_in)

            submission_file_validator = SubmissionFileValidator(
                schema_version=self.validator_schema_version)
            if not submission_file_validator.validate(file_path=data_in,
                                                      data=submission_data):
                raise RuntimeError(
                    "Submission file (%s) did not pass validation: %s" %
                    (data_in,
                     self._pretty_print_errors(
                         submission_file_validator.get_messages())))

        metadata = {}
        tables = []

        # validator for table data
        data_file_validator = DataFileValidator(
            schema_version=self.validator_schema_version)

        index = 0
        for i in range(0, len(submission_data)):
            if not submission_data[i]:  # empty YAML document
                continue
            if 'data_file' not in submission_data[i]:
                metadata = submission_data[
                    i]  # information about whole submission
                continue
            table_filepath = os.path.join(os.path.dirname(data_in),
                                          submission_data[i]['data_file'])
            with open(table_filepath, 'r') as table_file:
                if not os.path.exists(table_filepath):
                    raise ValueError("table file: %s does not exist" %
                                     table.data_file)

                table_data = yaml.load(table_file, Loader=Loader)

                if not data_file_validator.validate(data=table_data,
                                                    file_path=table_filepath):
                    raise RuntimeError(
                        "Data file (%s) did not pass validation: %s" %
                        (table_filepath,
                         self._pretty_print_errors(
                             data_file_validator.get_messages())))

                index = index + 1
                table = Table(index=index,
                              metadata=submission_data[i],
                              data=table_data)
                tables.append(table)

        return ParsedData(metadata, tables)
Пример #22
0
def process_submission_directory(basepath,
                                 submission_file_path,
                                 recid,
                                 update=False,
                                 *args,
                                 **kwargs):
    """
    Goes through an entire submission directory and processes the
    files within to create DataSubmissions
    with the files and related material attached as DataResources.
    :param basepath:
    :param submission_file_path:
    :param recid:
    :return:
    """
    added_file_names = []
    errors = {}

    if submission_file_path is not None:
        submission_file = open(submission_file_path, 'r')

        submission_file_validator = SubmissionFileValidator()
        is_valid_submission_file = submission_file_validator.validate(
            file_path=submission_file_path)

        data_file_validator = DataFileValidator()

        if is_valid_submission_file:
            try:
                submission_processed = yaml.load_all(submission_file,
                                                     Loader=yaml.CSafeLoader)
            except:
                submission_processed = yaml.safe_load_all(submission_file)

            # process file, extracting contents, and linking
            # the data record with the parent publication
            hepsubmission = get_latest_hepsubmission(publication_recid=recid)
            if hepsubmission is None:
                HEPSubmission(publication_recid=recid,
                              overall_status='todo',
                              inspire_id=hepsubmission.inspire_id,
                              coordinator=kwargs.get('user_id') if 'user_id'
                              in kwargs else int(current_user.get_id()),
                              version=hepsubmission.version + 1)

            # On a new upload, we reset the flag to notify reviewers
            hepsubmission.reviewers_notified = False

            # if it is finished and we receive an update,
            # then we need to reopen the submission to allow for revisions.
            if hepsubmission.overall_status == 'finished' and not update:
                # we create a new HEPSubmission object
                _rev_hepsubmission = HEPSubmission(
                    publication_recid=recid,
                    overall_status='todo',
                    inspire_id=hepsubmission.inspire_id,
                    coordinator=hepsubmission.coordinator,
                    version=hepsubmission.version + 1)
                db.session.add(_rev_hepsubmission)
                hepsubmission = _rev_hepsubmission

            reserve_doi_for_hepsubmission(hepsubmission, update)

            for yaml_document in submission_processed:
                if 'record_ids' in yaml_document or 'comment' in yaml_document or 'modifications' in yaml_document:
                    # comments are only present in the general submission
                    # information document.
                    process_general_submission_info(basepath, yaml_document,
                                                    recid)
                else:
                    existing_datasubmission_query = DataSubmission.query \
                        .filter_by(name=encode_string(yaml_document["name"]),
                                   publication_recid=recid,
                                   version=hepsubmission.version)

                    added_file_names.append(yaml_document["name"])

                    if existing_datasubmission_query.count() == 0:
                        datasubmission = DataSubmission(
                            publication_recid=recid,
                            name=encode_string(yaml_document["name"]),
                            description=encode_string(
                                yaml_document["description"]),
                            version=hepsubmission.version)

                    else:
                        datasubmission = existing_datasubmission_query.one()
                        datasubmission.description = encode_string(
                            yaml_document["description"])

                    db.session.add(datasubmission)

                    main_file_path = os.path.join(basepath,
                                                  yaml_document["data_file"])

                    if data_file_validator.validate(file_path=main_file_path):
                        process_data_file(recid, hepsubmission.version,
                                          basepath, yaml_document,
                                          datasubmission, main_file_path)
                    else:
                        errors = process_validation_errors_for_display(
                            data_file_validator.get_messages())

                        data_file_validator.clear_messages()

            cleanup_submission(recid, hepsubmission.version, added_file_names)

            db.session.commit()

            if len(errors) is 0:
                package_submission(basepath, recid, hepsubmission)
                reserve_dois_for_data_submissions(recid, hepsubmission.version)

                admin_indexer = AdminIndexer()
                admin_indexer.index_submission(hepsubmission)
        else:
            errors = process_validation_errors_for_display(
                submission_file_validator.get_messages())

            submission_file_validator.clear_messages()
            data_file_validator.clear_messages()
    else:
        # return an error
        errors = {
            "submission.yaml": [{
                "level":
                "error",
                "message":
                "No submission.yaml file found in submission."
            }]
        }
        return errors

    # we return all the errors collectively.
    # This makes more sense that returning errors as
    # soon as problems are found on one file.
    return errors
Пример #23
0
def test_invalid_schema_version():
    with pytest.raises(ValueError) as excinfo:
        validator = SubmissionFileValidator(schema_version='0.9999.99')

    assert "Invalid schema version 0.9999.99" == str(excinfo.value)
Пример #24
0
def validator_v1():
    return SubmissionFileValidator(schema_version='1.0.1')
Пример #25
0
def validator_v0():
    return SubmissionFileValidator(schema_version='0.1.0')
Пример #26
0
 def test_no_file_path_supplied(self):
     self.validator = SubmissionFileValidator()
     try:
         self.validator.validate(file_path=None)
     except LookupError as le:
         assert (le)
Пример #27
0
    def parse(self, data_in, *args, **kwargs):
        """
        :param data_in: path to submission.yaml
        :param args:
        :param kwargs:
        :raise ValueError:
        """
        if not os.path.exists(data_in):
            raise ValueError("File / Directory does not exist: %s" % data_in)

        if os.path.isdir(data_in):
            submission_filepath = os.path.join(data_in, 'submission.yaml')
            if not os.path.exists(submission_filepath):
                submission_filepath = os.path.join(data_in, 'submission.yml')
                if not os.path.exists(submission_filepath):
                    raise ValueError("No submission file in %s" % data_in)
            data_in = submission_filepath

        # first validate submission file:
        with open(data_in, 'r') as submission_file:
            try:
                submission_data = list(
                    yaml.load_all(submission_file, Loader=yaml.CSafeLoader))
            except:  # pragma: no cover
                submission_data = list(
                    yaml.load_all(submission_file))  # pragma: no cover

            if len(submission_data) == 0:
                raise RuntimeError("Submission file (%s) is empty" % data_in)

            submission_file_validator = SubmissionFileValidator()
            if not submission_file_validator.validate(file_path=data_in,
                                                      data=submission_data):
                raise RuntimeError(
                    "Submission file (%s) did not pass validation: %s" %
                    (data_in, self._pretty_print_errors(
                        submission_file_validator.get_messages())))

        tables = []

        # validator for table data
        data_file_validator = DataFileValidator()

        for i in range(1, len(submission_data)):
            table_filepath = os.path.join(os.path.dirname(data_in),
                                          submission_data[i]['data_file'])
            with open(table_filepath, 'r') as table_file:
                if not os.path.exists(table_filepath):
                    raise ValueError(
                        "table file: %s does not exist" % table.data_file)

                try:
                    # We try to load using the CLoader for speed improvements.
                    table_data = yaml.load(table_file, Loader=yaml.CSafeLoader)
                except:  # pragma: no cover
                    table_data = yaml.load(table_file)  # pragma: no cover

                if not data_file_validator.validate(data=table_data,
                                                    file_path=table_filepath):
                    raise RuntimeError(
                        "Data file (%s) did not pass validation: %s" %
                        (table_filepath, self._pretty_print_errors(
                            data_file_validator.get_messages())))

                table = Table(index=i, metadata=submission_data[i],
                              data=table_data)
                tables.append(table)

        return ParsedData(submission_data[0], tables)
Пример #28
0
def process_submission_directory(basepath, submission_file_path, recid, update=False, *args, **kwargs):
    """
    Goes through an entire submission directory and processes the
    files within to create DataSubmissions
    with the files and related material attached as DataResources.
    :param basepath:
    :param submission_file_path:
    :param recid:
    :return:
    """
    added_file_names = []
    errors = {}

    if submission_file_path is not None:
        submission_file = open(submission_file_path, 'r')

        submission_file_validator = SubmissionFileValidator()
        is_valid_submission_file = submission_file_validator.validate(
            file_path=submission_file_path)

        data_file_validator = DataFileValidator()

        if is_valid_submission_file:
            try:
                submission_processed = yaml.load_all(submission_file, Loader=yaml.CSafeLoader)
            except:
                submission_processed = yaml.safe_load_all(submission_file)

            # process file, extracting contents, and linking
            # the data record with the parent publication
            hepsubmission = get_latest_hepsubmission(publication_recid=recid)
            if hepsubmission is None:
                HEPSubmission(publication_recid=recid,
                              overall_status='todo',
                              inspire_id=hepsubmission.inspire_id,
                              coordinator=kwargs.get('user_id') if 'user_id' in kwargs else int(current_user.get_id()),
                              version=hepsubmission.version + 1)

            # On a new upload, we reset the flag to notify reviewers
            hepsubmission.reviewers_notified = False

            # if it is finished and we receive an update,
            # then we need to reopen the submission to allow for revisions.
            if hepsubmission.overall_status == 'finished' and not update:
                # we create a new HEPSubmission object
                _rev_hepsubmission = HEPSubmission(publication_recid=recid,
                                                   overall_status='todo',
                                                   inspire_id=hepsubmission.inspire_id,
                                                   coordinator=hepsubmission.coordinator,
                                                   version=hepsubmission.version + 1)
                db.session.add(_rev_hepsubmission)
                hepsubmission = _rev_hepsubmission

            reserve_doi_for_hepsubmission(hepsubmission)

            for yaml_document in submission_processed:
                if 'record_ids' in yaml_document or 'comment' in yaml_document or 'modifications' in yaml_document:
                    # comments are only present in the general submission
                    # information document.
                    process_general_submission_info(basepath, yaml_document, recid)
                else:
                    existing_datasubmission_query = DataSubmission.query \
                        .filter_by(name=encode_string(yaml_document["name"]),
                                   publication_recid=recid,
                                   version=hepsubmission.version)

                    added_file_names.append(yaml_document["name"])

                    if existing_datasubmission_query.count() == 0:
                        datasubmission = DataSubmission(
                            publication_recid=recid,
                            name=encode_string(yaml_document["name"]),
                            description=encode_string(
                                yaml_document["description"]),
                            version=hepsubmission.version)

                    else:
                        datasubmission = existing_datasubmission_query.one()
                        datasubmission.description = encode_string(
                            yaml_document["description"])

                    db.session.add(datasubmission)

                    main_file_path = os.path.join(basepath,
                                                  yaml_document["data_file"])

                    if data_file_validator.validate(file_path=main_file_path):
                        process_data_file(recid, hepsubmission.version, basepath, yaml_document,
                                          datasubmission, main_file_path)
                    else:
                        errors = process_validation_errors_for_display(
                            data_file_validator.get_messages())

                        data_file_validator.clear_messages()

            cleanup_submission(recid, hepsubmission.version,
                               added_file_names)

            db.session.commit()

            if len(errors) is 0:
                package_submission(basepath, recid, hepsubmission)
                reserve_dois_for_data_submissions(recid, hepsubmission.version)

                admin_indexer = AdminIndexer()
                admin_indexer.index_submission(hepsubmission)
        else:
            errors = process_validation_errors_for_display(
                submission_file_validator.get_messages())

            submission_file_validator.clear_messages()
            data_file_validator.clear_messages()
    else:
        # return an error
        errors = {"submission.yaml": [
            {"level": "error",
             "message": "No submission.yaml file found in submission."}
        ]}
        return errors

    # we return all the errors collectively.
    # This makes more sense that returning errors as
    # soon as problems are found on one file.
    return errors