Exemplo n.º 1
0
def register_doi(inspire_id, version):
    """Register DOI for a single submission defined by the INSPIRE ID and version."""
    if inspire_id and version:
        print('Generating for {0} version {1}'.format(inspire_id, version))
        _cleaned_id = inspire_id.replace("ins", "")
        generate_dois_for_submission.delay(inspire_id=_cleaned_id,
                                           version=version)
Exemplo n.º 2
0
def register_dois(inspire_ids, start_recid, end_recid):
    """Register DOIs for a comma-separated list of INSPIRE IDs."""
    if inspire_ids == 'all' and start_recid and end_recid:
        print('Generating for *all* record IDs between {} and {}'.format(start_recid, end_recid))
        generate_dois_for_submission.delay(start_recid, end_recid, overall_status='finished')
    elif inspire_ids:
        inspire_ids = inspire_ids.split(',')
        # find publication ids for these inspire_ids
        # register and mint the dois for the records
        for inspire_id in inspire_ids:
            print('Generating for {0}'.format(inspire_id))
            _cleaned_id = inspire_id.replace("ins", "")
            generate_dois_for_submission.delay(inspire_id=_cleaned_id)
Exemplo n.º 3
0
def register_dois(inspire_ids):
    """

    :param inspire_ids:
    :return:
    """

    if inspire_ids:
        inspire_ids = inspire_ids.split(',')
    else:
        generate_dois_for_submission.delay()

    # find publication ids for these inspire_ids
    # register and mint the dois for the records
    for inspire_id in inspire_ids:
        print('Generating for {0}'.format(inspire_id))
        _cleaned_id = inspire_id.replace("ins", "")
        generate_dois_for_submission.delay(inspire_id=_cleaned_id)
Exemplo n.º 4
0
    def update_file(inspire_id,
                    recid,
                    force=False,
                    only_record_information=False,
                    send_email=False,
                    send_tweet=False,
                    convert=False):
        self = Migrator()

        output_location, oldsite_last_updated = self.prepare_files_for_submission(
            inspire_id, force_retrieval=True)
        if output_location:
            updated_record_information, status = self.retrieve_publication_information(
                inspire_id)
            if status == 'success':
                record_information = update_record(recid,
                                                   updated_record_information)
            else:
                log.error("Failed to retrieve publication information for {0}".
                          format(inspire_id))
                return

            hep_submission = HEPSubmission.query.filter_by(
                publication_recid=recid).first()
            version_count = HEPSubmission.query.filter_by(
                publication_recid=recid).count()
            print('Old site last updated {}'.format(str(oldsite_last_updated)))
            print('New site last updated {}'.format(
                str(hep_submission.last_updated)))
            print('Coordinator ID is {}, version count is {}'.format(
                hep_submission.coordinator, version_count))
            allow_update = (hep_submission.last_updated < oldsite_last_updated or force) and \
                           hep_submission.coordinator == 1 and version_count == 1

            if not only_record_information and allow_update:
                try:
                    recid = self.load_submission(record_information,
                                                 output_location,
                                                 os.path.join(
                                                     output_location,
                                                     "submission.yaml"),
                                                 update=True)
                    print('Loaded record {}'.format(recid))

                    if recid is not None:
                        do_finalise(recid,
                                    publication_record=record_information,
                                    force_finalise=True,
                                    send_tweet=send_tweet,
                                    update=True,
                                    convert=convert)

                except FailedSubmission as fe:
                    log.error(fe.message)
                    fe.print_errors()
                    remove_submission(fe.record_id)
            elif not only_record_information:
                print('Not updating record {}'.format(recid))
            else:
                index_record_ids([record_information["recid"]])
                _cleaned_id = inspire_id.replace("ins", "")
                generate_dois_for_submission.delay(
                    inspire_id=_cleaned_id
                )  # update metadata stored in DataCite
                if send_email:
                    notify_publication_update(
                        hep_submission,
                        record_information)  # send email to all participants

        else:
            log.error("Failed to load {0}".format(inspire_id))
Exemplo n.º 5
0
def update_record_info(inspire_id, send_email=False):
    """Update publication information from INSPIRE for a specific record."""

    if inspire_id is None:
        log.error("Inspire ID is None")
        return 'Inspire ID is None'

    inspire_id = inspire_id.replace("ins", "")

    hep_submission = get_latest_hepsubmission(inspire_id=inspire_id)
    if hep_submission is None:
        log.warning("Failed to retrieve HEPData submission for Inspire ID {0}".format(inspire_id))
        return 'No HEPData submission'

    publication_recid = hep_submission.publication_recid

    log.info("Updating recid {} with information from Inspire record {}".format(publication_recid, inspire_id))

    updated_inspire_record_information, status = get_inspire_record_information(inspire_id)

    if status == 'success':

        # Also need to update publication information for data records.
        data_submissions = DataSubmission.query.filter_by(
            publication_recid=publication_recid, version=hep_submission.version
        ).order_by(DataSubmission.id.asc())
        record_ids = [publication_recid]  # list of record IDs
        for data_submission in data_submissions:
            record_ids.append(data_submission.associated_recid)

        same_information = {}
        for index, recid in enumerate(record_ids):

            if index == 0:
                updated_record_information = updated_inspire_record_information
            else:
                # Only update selected keys for data records.
                updated_record_information = {
                    key: updated_inspire_record_information[key] for key in (
                        'authors', 'creation_date', 'journal_info', 'collaborations'
                    )
                }

            record_information = get_record_by_id(recid)
            same_information[recid] = True
            for key, value in updated_record_information.items():
                if key not in record_information or record_information[key] != value:
                    log.debug('For recid {}, key {} has new value {}'.format(recid, key, value))
                    same_information[recid] = False
                    update_record(recid, updated_record_information)
                    break
            log.info('For recid {}, information needs to be updated: {}'.format(recid, str(not(same_information[recid]))))

        if all(same for same in same_information.values()):
            return 'No update needed'

    else:
        log.warning("Failed to retrieve publication information for Inspire record {0}".format(inspire_id))
        return 'Invalid Inspire ID'

    if hep_submission.overall_status == 'finished':
        index_record_ids(record_ids)  # index for Elasticsearch
        push_data_keywords(pub_ids=[recid])
        if not TESTING:
            generate_dois_for_submission.delay(inspire_id=inspire_id)  # update metadata stored in DataCite
        if send_email:
            record_information = get_record_by_id(publication_recid)
            notify_publication_update(hep_submission, record_information)   # send email to all participants

    return 'Success'
Exemplo n.º 6
0
def do_finalise(recid, publication_record=None, force_finalise=False,
                commit_message=None, send_tweet=False, update=False, convert=True,
                send_email=True):
    """
        Creates record SIP for each data record with a link to the associated
        publication.

        :param synchronous: if true then workflow execution and creation is
               waited on, then everything is indexed in one go.
               If False, object creation is asynchronous, however reindexing is not
               performed. This is only really useful for the full migration of
               content.
    """
    print('Finalising record {}'.format(recid))

    hep_submission = get_latest_hepsubmission(publication_recid=recid)

    generated_record_ids = []
    if hep_submission \
        and (force_finalise or hep_submission.coordinator == int(current_user.get_id())):

        submissions = DataSubmission.query.filter_by(
            publication_recid=recid,
            version=hep_submission.version).all()

        version = hep_submission.version

        existing_submissions = {}
        if hep_submission.version > 1 or update:
            # we need to determine which are the existing record ids.
            existing_data_records = get_records_matching_field(
                'related_publication', recid, doc_type=CFG_DATA_TYPE)

            for record in existing_data_records["hits"]["hits"]:

                if "recid" in record["_source"]:
                    existing_submissions[record["_source"]["title"]] = \
                        record["_source"]["recid"]
                    delete_item_from_index(record["_id"],
                                           doc_type=CFG_DATA_TYPE, parent=record["_source"]["related_publication"])

        current_time = "{:%Y-%m-%d %H:%M:%S}".format(datetime.utcnow())

        for submission in submissions:
            finalise_datasubmission(current_time, existing_submissions,
                                    generated_record_ids,
                                    publication_record, recid, submission,
                                    version)

        try:
            record = get_record_by_id(recid)
            # If we have a commit message, then we have a record update.
            # We will store the commit message and also update the
            # last_updated flag for the record.
            record['hepdata_doi'] = hep_submission.doi

            # The last updated date will be the current date (if record not migrated from the old site).
            if hep_submission.coordinator > 1:
                hep_submission.last_updated = datetime.utcnow()

            if commit_message:
                commit_record = RecordVersionCommitMessage(
                    recid=recid,
                    version=version,
                    message=str(commit_message))

                db.session.add(commit_record)

            record['last_updated'] = datetime.strftime(
                hep_submission.last_updated, '%Y-%m-%d %H:%M:%S')
            record['version'] = version

            record.commit()

            hep_submission.inspire_id = record['inspire_id']
            hep_submission.overall_status = "finished"
            db.session.add(hep_submission)

            db.session.commit()

            create_celery_app(current_app)

            # only mint DOIs if not testing.
            if not current_app.config.get('TESTING', False):
                generate_dois_for_submission.delay(inspire_id=hep_submission.inspire_id, version=version)
                log.info("Generated DOIs for ins{0}".format(hep_submission.inspire_id))

            # Reindex everything.
            index_record_ids([recid] + generated_record_ids)
            push_data_keywords(pub_ids=[recid])

            try:
                admin_indexer = AdminIndexer()
                admin_indexer.index_submission(hep_submission)
            except ConnectionTimeout as ct:
                log.error('Unable to add ins{0} to admin index.\n{1}'.format(hep_submission.inspire_id, ct))

            if send_email:
                send_finalised_email(hep_submission)

            if convert:
                for file_format in ['yaml', 'csv', 'yoda', 'root']:
                    convert_and_store.delay(hep_submission.inspire_id, file_format, force=True)

            if send_tweet:
                site_url = current_app.config.get('SITE_URL', 'https://www.hepdata.net')
                tweet(record.get('title'), record.get('collaborations'),
                      site_url + '/record/ins{0}'.format(record.get('inspire_id')), version)

            return json.dumps({"success": True, "recid": recid,
                               "data_count": len(submissions),
                               "generated_records": generated_record_ids})

        except NoResultFound:
            print('No record found to update. Which is super strange.')

    else:
        return json.dumps(
            {"success": False, "recid": recid,
             "errors": ["You do not have permission to finalise this "
                        "submission. Only coordinators can do that."]})