def formdata_to_model(obj, formdata): """Manipulate form data to match authors data model.""" form_fields = copy.deepcopy(formdata) filter_empty_elements( form_fields, ['institution_history', 'advisors', 'websites', 'experiments']) data = updateform.do(form_fields) # ====== # Schema # ====== if '$schema' not in data and '$schema' in obj.data: data['$schema'] = obj.data.get('$schema') if '$schema' in data and not data['$schema'].startswith('http'): data['$schema'] = url_for('invenio_jsonschemas.get_schema', schema_path="records/{0}".format( data['$schema'])) author_name = '' if 'family_name' in form_fields and form_fields['family_name']: author_name = form_fields['family_name'].strip() + ', ' if 'given_names' in form_fields and form_fields['given_names']: author_name += form_fields['given_names'] if author_name: data.get('name', {})['value'] = author_name # Add comments to extra data if 'extra_comments' in form_fields and form_fields['extra_comments']: data['_private_note'] = form_fields['extra_comments'] # Add email to extra data if "public_email" in form_fields and form_fields["public_email"]: obj.extra_data["public_email"] = form_fields["public_email"] data["public_email"] = form_fields["public_email"] # Add HEPNAMES collection data["collections"] = [{"primary": "HEPNAMES"}] # ========== # Owner Info # ========== try: user_email = User.query.get(obj.id_user).email except AttributeError: user_email = '' source = "{0}{1}".format('inspire:uid:', obj.id_user) data['acquisition_source'] = dict( source=source, email=user_email, date=date.today().isoformat(), method="submission", submission_number=str(obj.id), ) # Finally, set data return data
def formdata_to_model(obj, formdata): """Manipulate form data to match authors data model.""" form_fields = copy.deepcopy(formdata) filter_empty_elements( form_fields, ['institution_history', 'advisors', 'websites', 'experiments'] ) data = updateform.do(form_fields) # ====== # Schema # ====== if '$schema' in data and not data['$schema'].startswith('http'): data['$schema'] = url_for( 'invenio_jsonschemas.get_schema', schema_path="records/{0}".format(data['$schema']) ) author_name = '' if 'family_name' in form_fields and form_fields['family_name']: author_name = form_fields['family_name'].strip() + ', ' if 'given_names' in form_fields and form_fields['given_names']: author_name += form_fields['given_names'] if author_name: data.get('name', {})['value'] = author_name # Add comments to extra data if "comments" in form_fields and form_fields["comments"]: obj.extra_data["comments"] = form_fields["comments"] data["_private_note"] = form_fields["comments"] # Add HEPNAMES collection data["collections"] = [{ "primary": "HEPNAMES" }] # ========== # Owner Info # ========== try: user_email = User.query.get(obj.id_user).email except AttributeError: user_email = '' sources = ["{0}{1}".format('inspire:uid:', obj.id_user)] data['acquisition_source'] = dict( source=sources, email=user_email, date=date.today().isoformat(), method="submission", submission_number=str(obj.id), ) # Finally, set data return data
def convert_data_to_model(obj, eng): """Manipulate form data to match author model keys.""" # Save original form data for later access form_fields = copy.deepcopy(obj.data) obj.extra_data["formdata"] = copy.deepcopy(form_fields) filter_empty_elements( obj.data, ['institution_history', 'advisors', 'websites', 'experiments'] ) converted = updateform.do(obj.data) obj.data.update(converted) author_name = '' if 'family_name' in form_fields and form_fields['family_name']: author_name = form_fields['family_name'].strip() + ', ' if 'given_names' in form_fields and form_fields['given_names']: author_name += form_fields['given_names'] if author_name: obj.data.get('name', {})['value'] = author_name # Add comments to extra data if "comments" in form_fields and form_fields["comments"]: obj.extra_data["comments"] = form_fields["comments"] obj.data["_private_note"] = form_fields["comments"] # Add HEPNAMES collection obj.data["collections"] = [{ "primary": "HEPNAMES" }] # ========== # Owner Info # ========== try: user_email = User.query.get(obj.id_user).email except AttributeError: user_email = '' sources = ["{0}{1}".format('inspire:uid:', obj.id_user)] obj.data['acquisition_source'] = dict( source=sources, email=user_email, date=date.today().isoformat(), method="submission", submission_number=obj.id, )
def formdata_to_model(obj, formdata): """Manipulate form data to match authors data model.""" form_fields = copy.deepcopy(formdata) filter_empty_elements( form_fields, ['institution_history', 'advisors', 'websites', 'experiments'] ) data = updateform.do(form_fields) # ====== # Schema # ====== if '$schema' not in data and '$schema' in obj.data: data['$schema'] = obj.data.get('$schema') if '$schema' in data and not data['$schema'].startswith('http'): data['$schema'] = url_for( 'invenio_jsonschemas.get_schema', schema_path="records/{0}".format(data['$schema']) ) author_name = '' if 'family_name' in form_fields and form_fields['family_name']: author_name = form_fields['family_name'].strip() + ', ' if 'given_names' in form_fields and form_fields['given_names']: author_name += form_fields['given_names'] if author_name: data.get('name', {})['value'] = author_name # Add comments to extra data if 'extra_comments' in form_fields and form_fields['extra_comments']: data.setdefault('_private_notes', []).append({ 'source': 'submitter', 'value': form_fields['extra_comments'] }) data['stub'] = False # ========== # Submitter Info # ========== try: user_email = User.query.get(obj.id_user).email except AttributeError: user_email = '' try: orcid = UserIdentity.query.filter_by( id_user=obj.id_user, method='orcid' ).one().id except NoResultFound: orcid = '' data['acquisition_source'] = dict( email=user_email, datetime=datetime.datetime.utcnow().isoformat(), method="submitter", orcid=orcid, submission_number=str(obj.id), internal_uid=int(obj.id_user), ) strip_empty_values(data) validate(data, 'authors') return data
def formdata_to_model(obj, formdata): """Manipulate form data to match literature data model.""" def _is_arxiv_url(url): return 'arxiv.org' in url form_fields = copy.deepcopy(formdata) filter_empty_elements(form_fields, ['authors', 'supervisors', 'report_numbers']) builder = LiteratureBuilder(source='submitter') for author in form_fields.get('authors', []): builder.add_author( builder.make_author(author['full_name'], affiliations=force_list(author['affiliation']) if author['affiliation'] else None, roles=['author'])) for supervisor in form_fields.get('supervisors', []): builder.add_author( builder.make_author( supervisor['full_name'], affiliations=force_list(supervisor['affiliation']) if author['affiliation'] else None, roles=['supervisor'])) builder.add_title(title=form_fields.get('title')) document_type = 'conference paper' if form_fields.get('conf_name') \ else form_fields.get('type_of_doc', []) if document_type == 'chapter': document_type = 'book chapter' builder.add_document_type(document_type=document_type) builder.add_abstract( abstract=form_fields.get('abstract'), source='arXiv' if form_fields.get('categories') else None) if form_fields.get('arxiv_id') and form_fields.get('categories'): builder.add_arxiv_eprint( arxiv_id=form_fields.get('arxiv_id'), arxiv_categories=form_fields.get('categories').split()) builder.add_doi(doi=form_fields.get('doi')) builder.add_inspire_categories( subject_terms=form_fields.get('subject_term'), source='user') for key in ('extra_comments', 'nonpublic_note', 'hidden_notes', 'conf_name'): builder.add_private_note(private_notes=form_fields.get(key)) year = form_fields.get('year') try: year = int(year) except (TypeError, ValueError): year = None builder.add_preprint_date( preprint_date=form_fields.get('preprint_created')) if form_fields.get('type_of_doc') == 'thesis': builder.add_thesis(defense_date=form_fields.get('defense_date'), degree_type=form_fields.get('degree_type'), institution=form_fields.get('institution'), date=form_fields.get('thesis_date')) if form_fields.get('type_of_doc') == 'chapter': if not form_fields.get('journal_title'): builder.add_book_series(title=form_fields.get('series_title')) if form_fields.get('type_of_doc') == 'book': if form_fields.get('journal_title'): form_fields['volume'] = form_fields.get('series_volume') else: builder.add_book_series(title=form_fields.get('series_title'), volume=form_fields.get('series_volume')) builder.add_book(publisher=form_fields.get('publisher_name'), place=form_fields.get('publication_place'), date=form_fields.get('publication_date')) builder.add_publication_info( year=year, cnum=form_fields.get('conference_id'), journal_issue=form_fields.get('issue'), journal_title=form_fields.get('journal_title'), journal_volume=form_fields.get('volume'), page_start=form_fields.get('start_page'), page_end=form_fields.get('end_page'), artid=form_fields.get('artid'), parent_record=form_fields.get('parent_book')) builder.add_accelerator_experiments_legacy_name( legacy_name=form_fields.get('experiment')) language = form_fields.get('other_language') \ if form_fields.get('language') == 'oth' \ else form_fields.get('language') builder.add_language(language=language) if form_fields.get('title_translation'): builder.add_title_translation( title=form_fields['title_translation'], language='en', ) builder.add_title(title=form_fields.get('title_arXiv'), source='arXiv') builder.add_title(title=form_fields.get('title_crossref'), source='crossref') builder.add_license(url=form_fields.get('license_url')) builder.add_public_note(public_note=form_fields.get('public_notes')) builder.add_public_note( public_note=form_fields.get('note'), source='arXiv' if form_fields.get('categories') else 'CrossRef') form_url = form_fields.get('url') form_additional_url = form_fields.get('additional_url') if form_url and not _is_arxiv_url(form_url): obj.extra_data['submission_pdf'] = form_url if not form_additional_url: builder.add_url(url=form_url) if form_additional_url and not _is_arxiv_url(form_additional_url): builder.add_url(url=form_additional_url) [ builder.add_report_number( report_number=report_number.get('report_number')) for report_number in form_fields.get('report_numbers', []) ] builder.add_collaboration(collaboration=form_fields.get('collaboration')) builder.add_acquisition_source( datetime=datetime.datetime.utcnow().isoformat(), submission_number=obj.id, internal_uid=int(obj.id_user), email=form_fields.get('email'), orcid=form_fields.get('orcid'), method='submitter') return builder.record
def formdata_to_model(obj, formdata): """Manipulate form data to match literature data model.""" def _is_arxiv_url(url): return 'arxiv.org' in url form_fields = copy.deepcopy(formdata) filter_empty_elements( form_fields, ['authors', 'supervisors', 'report_numbers'] ) builder = LiteratureBuilder(source='submitter') for author in form_fields.get('authors', []): builder.add_author(builder.make_author( author['full_name'], affiliations=force_list(author['affiliation']) if author['affiliation'] else None, roles=['author'] )) for supervisor in form_fields.get('supervisors', []): builder.add_author(builder.make_author( supervisor['full_name'], affiliations=force_list(supervisor['affiliation']) if author['affiliation'] else None, roles=['supervisor'] )) builder.add_title(title=form_fields.get('title')) document_type = 'conference paper' if form_fields.get('conf_name') \ else form_fields.get('type_of_doc', []) builder.add_document_type( document_type=document_type ) builder.add_abstract( abstract=form_fields.get('abstract'), source='arXiv' if form_fields.get('categories') else None ) if form_fields.get('arxiv_id') and form_fields.get('categories'): builder.add_arxiv_eprint( arxiv_id=form_fields.get('arxiv_id'), arxiv_categories=form_fields.get('categories').split() ) builder.add_doi(doi=form_fields.get('doi')) builder.add_inspire_categories( subject_terms=form_fields.get('subject_term'), source='user' ) for key in ('extra_comments', 'nonpublic_note', 'hidden_notes', 'conf_name', 'references'): builder.add_private_note( private_notes=form_fields.get(key) ) year = form_fields.get('year') try: year = int(year) except (TypeError, ValueError): year = None builder.add_publication_info( year=year, cnum=form_fields.get('conference_id'), journal_issue=form_fields.get('issue'), journal_title=form_fields.get('journal_title'), journal_volume=form_fields.get('volume'), page_start=form_fields.get('page_start'), page_end=form_fields.get('page_end'), artid=form_fields.get('artid') ) builder.add_preprint_date( preprint_date=form_fields.get('preprint_created') ) if form_fields.get('type_of_doc') == 'thesis': builder.add_thesis( defense_date=form_fields.get('defense_date'), degree_type=form_fields.get('degree_type'), institution=form_fields.get('institution'), date=form_fields.get('thesis_date') ) builder.add_accelerator_experiments_legacy_name( legacy_name=form_fields.get('experiment') ) language = form_fields.get('other_language') \ if form_fields.get('language') == 'oth' \ else form_fields.get('language') builder.add_language(language=language) builder.add_title_translation(title=form_fields.get('title_translation')) builder.add_title( title=form_fields.get('title_arXiv'), source='arXiv' ) builder.add_title( title=form_fields.get('title_crossref'), source='crossref' ) builder.add_license(url=form_fields.get('license_url')) builder.add_public_note(public_note=form_fields.get('public_notes')) builder.add_public_note( public_note=form_fields.get('note'), source='arXiv' if form_fields.get('categories') else 'CrossRef' ) form_url = form_fields.get('url') form_additional_url = form_fields.get('additional_url') if form_url and not _is_arxiv_url(form_url): obj.extra_data['submission_pdf'] = form_url if not form_additional_url: builder.add_url(url=form_url) if form_additional_url and not _is_arxiv_url(form_additional_url): builder.add_url(url=form_additional_url) [builder.add_report_number( report_number=report_number.get('report_number') ) for report_number in form_fields.get('report_numbers', [])] builder.add_collaboration(collaboration=form_fields.get('collaboration')) builder.add_acquisition_source( datetime=datetime.datetime.utcnow().isoformat(), submission_number=obj.id, internal_uid=int(obj.id_user), email=form_fields.get('email'), orcid=form_fields.get('orcid'), method='submitter' ) builder.validate_record() return builder.record
def formdata_to_model(obj, formdata): """Manipulate form data to match literature data model.""" form_fields = copy.deepcopy(formdata) filter_empty_elements( form_fields, ['authors', 'supervisors', 'report_numbers'] ) obj.extra_data["submission_data"] = {} data = literature.do(form_fields) # Add extra fields that need to be computed or depend on other # fields. # # ====== # Schema # ====== if '$schema' in data and not data['$schema'].startswith('http'): data['$schema'] = url_for( 'invenio_jsonschemas.get_schema', schema_path="records/{0}".format(data['$schema']) ) # ============================ # Collection # ============================ data['collections'] = [{'primary': "HEP"}] if form_fields['type_of_doc'] == 'thesis': data['collections'].append({'primary': "THESIS"}) if "field_categories" in data: # Check if it was imported from arXiv if any([x["scheme"] == "arXiv" for x in data["field_categories"]]): data['collections'].extend([{'primary': "arXiv"}, {'primary': "Citeable"}]) # Add arXiv as source if data.get("abstracts"): data['abstracts'][0]['source'] = 'arXiv' if form_fields.get("arxiv_id"): data['external_system_numbers'] = [{ 'value': 'oai:arXiv.org:' + form_fields['arxiv_id'], 'institute': 'arXiv' }] if "publication_info" in data: if all([key in data['publication_info'][0].keys() for key in ('year', 'journal_issue', 'journal_volume', 'page_start', 'page_end', 'artid')]): # NOTE: Only peer reviewed journals should have this collection # we are adding it here but ideally should be manually added # by a curator. data['collections'].append({'primary': "Published"}) # Add Citeable collection if not present collections = [x['primary'] for x in data['collections']] if "Citeable" not in collections: data['collections'].append({'primary': "Citeable"}) # ============================ # Title source and cleanup # ============================ try: # Clean up all extra spaces in title data['titles'][0]['title'] = " ".join( data['titles'][0]['title'].split() ) title = data['titles'][0]['title'] except (KeyError, IndexError): title = "" if form_fields.get('title_arXiv'): title_arxiv = " ".join(form_fields.get('title_arXiv').split()) if title == title_arxiv: data['titles'][0]["source"] = "arXiv" else: data['titles'].append({ 'title': title_arxiv, 'source': "arXiv" }) if form_fields.get('title_crossref'): title_crossref = " ".join( form_fields.get('title_crossref').split() ) if title == title_crossref: data['titles'][0]["source"] = "CrossRef" else: data['titles'].append({ 'title': title_crossref, 'source': "CrossRef" }) try: data['titles'][0]['source'] except KeyError: # Title has no source, so should be the submitter data['titles'][0]['source'] = "submitter" # ============================ # Conference name # ============================ if 'conf_name' in form_fields: if 'nonpublic_note' in form_fields: data.setdefault("hidden_notes", []).append({ "value": form_fields['conf_name'] }) data['hidden_notes'].append({ 'value': form_fields['nonpublic_note'] }) else: data.setdefault("hidden_notes", []).append({ "value": form_fields['conf_name'] }) data['collections'].extend([{'primary': "ConferencePaper"}]) # ============================ # Page number # ============================ if 'page_nr' not in data: first_publication_info = data.get('publication_info', [{}])[0] page_start = first_publication_info.get('page_start') page_end = first_publication_info.get('page_end') if page_start and page_end: try: data['page_nr'] = int(page_end) - int(page_start) + 1 except (TypeError, ValueError): pass # ============================ # Language # ============================ if data.get("languages", []) and data["languages"][0] == "oth": if form_fields.get("other_language"): data["languages"] = [form_fields["other_language"]] # ========== # Owner Info # ========== # TODO Make sure we are getting the email correctly userid = obj.id_user try: email = User.query.get(userid).email except AttributeError: email = '' try: # TODO Make sure we are getting the ORCID id correctly source = UserIdentity.query.filter_by(id_user=userid, method='orcid').one() except NoResultFound: source = '' if source: source = source.method + ':' + source.id data['acquisition_source'] = dict( source=source, email=email, date=date.today().isoformat(), method="submission", submission_number=str(obj.id), ) # ============== # References # ============== if form_fields.get('references'): obj.extra_data["submission_data"]['references'] = form_fields.get('references') # ============== # Extra comments # ============== if form_fields.get('extra_comments'): data.setdefault('hidden_notes', []).append( { 'value': form_fields['extra_comments'], 'source': 'submitter' } ) obj.extra_data["submission_data"]["extra_comments"] = form_fields.get("extra_comments") # ====================================== # Journal name Knowledge Base conversion # ====================================== if data.get("publication_info", [{}])[0].get("journal_title"): # journals_kb = dict([(x['key'].lower(), x['value']) # for x in get_kb_mappings(current_app.config.get("DEPOSIT_INSPIRE_JOURNALS_KB"))]) # data['publication_info']['journal_title'] = journals_kb.get(data['publication_info']['journal_title'].lower(), # data['publication_info']['journal_title']) # TODO convert using journal records pass if 'pdf' in data: obj.extra_data["submission_data"]["pdf"] = data.pop("pdf") # Finally, return the converted data return data
def formdata_to_model(obj, formdata): """Manipulate form data to match literature data model.""" form_fields = copy.deepcopy(formdata) filter_empty_elements(form_fields, ['authors', 'supervisors', 'report_numbers']) obj.extra_data["submission_data"] = {} data = literature.do(form_fields) # Add extra fields that need to be computed or depend on other # fields. # # ====== # Schema # ====== if '$schema' in data and not data['$schema'].startswith('http'): data['$schema'] = url_for('invenio_jsonschemas.get_schema', schema_path="records/{0}".format( data['$schema'])) # ============================ # Collection # ============================ data['collections'] = [{'primary': "HEP"}] if form_fields['type_of_doc'] == 'thesis': data['collections'].append({'primary': "THESIS"}) if "field_categories" in data: # Check if it was imported from arXiv if any([x["scheme"] == "arXiv" for x in data["field_categories"]]): data['collections'].extend([{ 'primary': "arXiv" }, { 'primary': "Citeable" }]) # Add arXiv as source if data.get("abstracts"): data['abstracts'][0]['source'] = 'arXiv' if form_fields.get("arxiv_id"): data['external_system_numbers'] = [{ 'value': 'oai:arXiv.org:' + form_fields['arxiv_id'], 'institute': 'arXiv' }] if "publication_info" in data: if all([ key in data['publication_info'][0].keys() for key in ('year', 'journal_issue', 'journal_volume', 'page_start', 'page_end', 'artid') ]): # NOTE: Only peer reviewed journals should have this collection # we are adding it here but ideally should be manually added # by a curator. data['collections'].append({'primary': "Published"}) # Add Citeable collection if not present collections = [x['primary'] for x in data['collections']] if "Citeable" not in collections: data['collections'].append({'primary': "Citeable"}) # ============================ # Title source and cleanup # ============================ try: # Clean up all extra spaces in title data['titles'][0]['title'] = " ".join( data['titles'][0]['title'].split()) title = data['titles'][0]['title'] except (KeyError, IndexError): title = "" if form_fields.get('title_arXiv'): title_arxiv = " ".join(form_fields.get('title_arXiv').split()) if title == title_arxiv: data['titles'][0]["source"] = "arXiv" else: data['titles'].append({'title': title_arxiv, 'source': "arXiv"}) if form_fields.get('title_crossref'): title_crossref = " ".join(form_fields.get('title_crossref').split()) if title == title_crossref: data['titles'][0]["source"] = "CrossRef" else: data['titles'].append({ 'title': title_crossref, 'source': "CrossRef" }) try: data['titles'][0]['source'] except KeyError: # Title has no source, so should be the submitter data['titles'][0]['source'] = "submitter" # ============================ # Conference name # ============================ if 'conf_name' in form_fields: if 'nonpublic_note' in form_fields: data.setdefault("hidden_notes", []).append({"value": form_fields['conf_name']}) data['hidden_notes'].append( {'value': form_fields['nonpublic_note']}) else: data.setdefault("hidden_notes", []).append({"value": form_fields['conf_name']}) data['collections'].extend([{'primary': "ConferencePaper"}]) # ============================ # Page number # ============================ if 'page_nr' not in data: first_publication_info = data.get('publication_info', [{}])[0] page_start = first_publication_info.get('page_start') page_end = first_publication_info.get('page_end') if page_start and page_end: try: data['page_nr'] = int(page_end) - int(page_start) + 1 except (TypeError, ValueError): pass # ============================ # Language # ============================ if data.get("languages", []) and data["languages"][0] == "oth": if form_fields.get("other_language"): data["languages"] = [form_fields["other_language"]] # ========== # Owner Info # ========== # TODO Make sure we are getting the email correctly userid = obj.id_user try: email = User.query.get(userid).email except AttributeError: email = '' try: # TODO Make sure we are getting the ORCID id correctly source = UserIdentity.query.filter_by(id_user=userid, method='orcid').one() except NoResultFound: source = '' if source: source = source.method + ':' + source.id data['acquisition_source'] = dict( source=source, email=email, date=date.today().isoformat(), method="submission", submission_number=str(obj.id), ) # ============== # References # ============== if form_fields.get('references'): obj.extra_data["submission_data"]['references'] = form_fields.get( 'references') # ============== # Extra comments # ============== if form_fields.get('extra_comments'): data.setdefault('hidden_notes', []).append({ 'value': form_fields['extra_comments'], 'source': 'submitter' }) obj.extra_data["submission_data"]["extra_comments"] = form_fields.get( "extra_comments") # ====================================== # Journal name Knowledge Base conversion # ====================================== if data.get("publication_info", [{}])[0].get("journal_title"): # journals_kb = dict([(x['key'].lower(), x['value']) # for x in get_kb_mappings(current_app.config.get("DEPOSIT_INSPIRE_JOURNALS_KB"))]) # data['publication_info']['journal_title'] = journals_kb.get(data['publication_info']['journal_title'].lower(), # data['publication_info']['journal_title']) # TODO convert using journal records pass if 'pdf' in data: obj.extra_data["submission_data"]["pdf"] = data.pop("pdf") # Finally, return the converted data return data
def formdata_to_model(obj, formdata): """Manipulate form data to match authors data model.""" form_fields = copy.deepcopy(formdata) filter_empty_elements( form_fields, ['institution_history', 'advisors', 'websites', 'experiments'] ) data = updateform.do(form_fields) # =========== # Collections # =========== data['_collections'] = ['Authors'] # ====== # Schema # ====== # FIXME it's not clear whether $schema is ever present at this stage if '$schema' not in data and '$schema' in obj.data: data['$schema'] = obj.data.get('$schema') if '$schema' in data: ensure_valid_schema(data) author_name = '' if 'family_name' in form_fields and form_fields['family_name']: author_name = form_fields['family_name'].strip() + ', ' if 'given_names' in form_fields and form_fields['given_names']: author_name += form_fields['given_names'] if author_name: data.get('name', {})['value'] = author_name # Add comments to extra data if 'extra_comments' in form_fields and form_fields['extra_comments']: data.setdefault('_private_notes', []).append({ 'source': 'submitter', 'value': form_fields['extra_comments'] }) data['stub'] = False # ========== # Submitter Info # ========== try: user_email = User.query.get(obj.id_user).email except AttributeError: user_email = '' try: orcid = UserIdentity.query.filter_by( id_user=obj.id_user, method='orcid' ).one().id except NoResultFound: orcid = '' data['acquisition_source'] = dict( email=user_email, datetime=datetime.datetime.utcnow().isoformat(), method="submitter", orcid=orcid, submission_number=str(obj.id), internal_uid=int(obj.id_user), ) data = strip_empty_values(data) return data
def formdata_to_model(obj, formdata): """Manipulate form data to match authors data model.""" form_fields = copy.deepcopy(formdata) filter_empty_elements( form_fields, ['institution_history', 'advisors', 'websites', 'experiments']) data = updateform.do(form_fields) # =========== # Collections # =========== data['_collections'] = ['Authors'] # ====== # Schema # ====== if '$schema' not in data and '$schema' in obj.data: data['$schema'] = obj.data.get('$schema') if '$schema' in data and not data['$schema'].startswith('http'): data['$schema'] = url_for('invenio_jsonschemas.get_schema', schema_path="records/{0}".format( data['$schema'])) author_name = '' if 'family_name' in form_fields and form_fields['family_name']: author_name = form_fields['family_name'].strip() + ', ' if 'given_names' in form_fields and form_fields['given_names']: author_name += form_fields['given_names'] if author_name: data.get('name', {})['value'] = author_name # Add comments to extra data if 'extra_comments' in form_fields and form_fields['extra_comments']: data.setdefault('_private_notes', []).append({ 'source': 'submitter', 'value': form_fields['extra_comments'] }) data['stub'] = False # ========== # Submitter Info # ========== try: user_email = User.query.get(obj.id_user).email except AttributeError: user_email = '' try: orcid = UserIdentity.query.filter_by(id_user=obj.id_user, method='orcid').one().id except NoResultFound: orcid = '' data['acquisition_source'] = dict( email=user_email, datetime=datetime.datetime.utcnow().isoformat(), method="submitter", orcid=orcid, submission_number=str(obj.id), internal_uid=int(obj.id_user), ) data = strip_empty_values(data) validate(data, 'authors') return data
def formdata_to_model(obj, formdata): """Manipulate form data to match literature data model.""" form_fields = copy.deepcopy(formdata) filter_empty_elements(form_fields, ['authors', 'supervisors', 'report_numbers']) data = literature.do(form_fields) # Add extra fields that need to be computed or depend on other # fields. # # ====== # Schema # ====== if '$schema' in data and not data['$schema'].startswith('http'): jsonschemas_ext = current_app.extensions.get('invenio-jsonschemas') data['$schema'] = jsonschemas_ext.path_to_url("records/{0}".format( data['$schema'])) # ============================ # Collection # ============================ data['collections'] = [{'primary': "HEP"}] if form_fields['type_of_doc'] == 'thesis': data['collections'].append({'primary': "THESIS"}) if get_value(form_fields, "arxiv_eprints.categories", None): # Check if it was imported from arXiv data['collections'].extend([{ 'primary': "arXiv" }, { 'primary': "Citeable" }]) # Add arXiv as source if data.get("abstracts"): data['abstracts'][0]['source'] = 'arXiv' if form_fields.get("arxiv_id"): data['external_system_numbers'] = [{ 'value': 'oai:arXiv.org:' + form_fields['arxiv_id'], 'institute': 'arXiv' }] if "publication_info" in data: pub_keys = data['publication_info'][0].keys() has_pub_info = all([ key in pub_keys for key in ('year', 'journal_issue', 'journal_volume') ]) has_page_or_artid = any( [key in pub_keys for key in ('page_start', 'page_end', 'artid')]) if has_pub_info and has_page_or_artid: # NOTE: Only peer reviewed journals should have this collection # we are adding it here but ideally should be manually added # by a curator. data['collections'].append({'primary': "Published"}) # Add Citeable collection if not present collections = [x['primary'] for x in data['collections']] if "Citeable" not in collections: data['collections'].append({'primary': "Citeable"}) # ============================ # Title source and cleanup # ============================ try: # Clean up all extra spaces in title data['titles'][0]['title'] = " ".join( data['titles'][0]['title'].split()) title = data['titles'][0]['title'] except (KeyError, IndexError): title = "" if form_fields.get('title_arXiv'): title_arxiv = " ".join(form_fields.get('title_arXiv').split()) if title == title_arxiv: data['titles'][0]["source"] = "arXiv" else: data['titles'].append({'title': title_arxiv, 'source': "arXiv"}) if form_fields.get('title_crossref'): title_crossref = " ".join(form_fields.get('title_crossref').split()) if title == title_crossref: data['titles'][0]["source"] = "CrossRef" else: data['titles'].append({ 'title': title_crossref, 'source': "CrossRef" }) try: data['titles'][0]['source'] except KeyError: # Title has no source, so should be the submitter data['titles'][0]['source'] = "submitter" # ============================ # Conference name # ============================ if 'conf_name' in form_fields: if 'nonpublic_note' in form_fields: data.setdefault("hidden_notes", []).append({"value": form_fields['conf_name']}) data['hidden_notes'].append( {'value': form_fields['nonpublic_note']}) else: data.setdefault("hidden_notes", []).append({"value": form_fields['conf_name']}) data['collections'].extend([{'primary': "ConferencePaper"}]) # ============================ # Page number # ============================ if 'page_nr' not in data: first_publication_info = data.get('publication_info', [{}])[0] page_start = first_publication_info.get('page_start') page_end = first_publication_info.get('page_end') if page_start and page_end: try: data['page_nr'] = int(page_end) - int(page_start) + 1 except (TypeError, ValueError): pass # ============================ # Language # ============================ if form_fields.get('language') == 'oth': if form_fields.get("other_language"): data["languages"] = [form_fields["other_language"]] # ========== # Owner Info # ========== # TODO Make sure we are getting the email correctly userid = obj.id_user try: email = User.query.get(userid).email except AttributeError: email = '' try: # TODO Make sure we are getting the ORCID id correctly source = UserIdentity.query.filter_by(id_user=userid, method='orcid').one() except NoResultFound: source = '' if source: source = source.method + ':' + source.id data['acquisition_source'] = dict( source=source, email=email, date=date.today().isoformat(), method="submission", submission_number=str(obj.id), ) # ============== # Extra comments # ============== if form_fields.get('extra_comments'): data.setdefault('hidden_notes', []).append({ 'value': form_fields['extra_comments'], 'source': 'submitter' }) # ========================== # Journal name normalization # ========================== journal_title = get_value(data, 'publication_info[0].journal_title') if journal_title: hits = JournalsSearch().query( 'match', title_variants__title__lowercased=journal_title).execute() if hits: try: short_title = hits[0].short_titles[0].title data['publication_info'][0]['journal_title'] = short_title except (AttributeError, IndexError): pass # Finally, return the converted data return data