def update_references(recid, overwrite=True): """Update references for a record. First, we extract references from a record. Then, we are not updating the record directly but adding a bibupload task in -c mode which takes care of updating the record. Parameters: * recid: the id of the record """ if not overwrite: # Check for references in record record = get_record(recid) if record and record_has_field(record, '999'): raise RecordHasReferences('Record has references and overwrite ' 'mode is disabled: %s' % recid) if get_fieldvalues(recid, '999C59'): raise RecordHasReferences('Record has been curated: %s' % recid) # Parse references references_xml = extract_references_from_record_xml(recid) # Save new record to file (temp_fd, temp_path) = mkstemp(prefix=cfg.get("CFG_REFEXTRACT_FILENAME"), dir=cfg.get("CFG_TMPSHAREDDIR")) temp_file = os.fdopen(temp_fd, 'w') temp_file.write(references_xml) temp_file.close() # Update record task_low_level_submission('bibupload', 'refextract', '-P', '4', '-c', temp_path)
def __init__(self, username=None, password=None, url=None, prefix=None, test_mode=None, api_ver="2"): """Initialize API client. Compatibility layer on top of external DataCite API client. """ warnings.warn( "Use of invenio.utils.datacite:DataCite is " "deprecated in favor of " "http://datacite.readthedocs.org/en/latest/.", RemovedInInvenio22Warning) super(DataCite, self).__init__( username=username or cfg.get('CFG_DATACITE_USERNAME', ''), password=password or cfg.get('CFG_DATACITE_PASSWORD', ''), url=url or cfg.get('CFG_DATACITE_URL', 'https://mds.datacite.org/'), prefix=prefix or cfg.get('CFG_DATACITE_DOI_PREFIX', '10.5072'), test_mode=test_mode if test_mode is not None else cfg.get( 'CFG_DATACITE_TESTMODE', False), api_ver=api_ver or "2")
def index(p, so, page): """Index page with uploader and list of existing depositions.""" ctx = mycommunities_ctx() if not so: so = cfg.get('COMMUNITIES_DEFAULT_SORTING_OPTION') communities = Community.filter_communities(p, so) featured_community = FeaturedCommunity.get_current() form = SearchForm(p=p) per_page = cfg.get('COMMUNITIES_DISPLAYED_PER_PAGE', 10) page = max(page, 1) p = Pagination(page, per_page, communities.count()) ctx.update({ 'r_from': max(p.per_page*(p.page-1), 0), 'r_to': min(p.per_page*p.page, p.total_count), 'r_total': p.total_count, 'pagination': p, 'form': form, 'title': _('Community Collections'), 'communities': communities.slice( per_page*(page-1), per_page*page).all(), 'featured_community': featured_community, 'format_record': format_record, }) return render_template( "communities/index.html", **ctx )
def get_canonical_and_alternates_urls(url, drop_ln=True, washed_argd=None, quote_path=False): """ Given an Invenio URL returns a tuple with two elements. The first is the canonical URL, that is the original URL with CFG_SITE_URL prefix, and where the ln= argument stripped. The second element element is mapping, language code -> alternate URL @param quote_path: if True, the path section of the given C{url} is quoted according to RFC 2396 """ dummy_scheme, dummy_netloc, path, dummy_params, query, fragment = urlparse( url) canonical_scheme, canonical_netloc = urlparse(cfg.get('CFG_SITE_URL'))[0:2] parsed_query = washed_argd or parse_qsl(query) no_ln_parsed_query = [(key, value) for (key, value) in parsed_query if key != 'ln'] if drop_ln: canonical_parsed_query = no_ln_parsed_query else: canonical_parsed_query = parsed_query if quote_path: path = urllib.quote(path) canonical_query = urlencode(canonical_parsed_query) canonical_url = urlunparse((canonical_scheme, canonical_netloc, path, dummy_params, canonical_query, fragment)) alternate_urls = {} for ln in cfg.get('CFG_SITE_LANGS'): alternate_query = urlencode(no_ln_parsed_query + [('ln', ln)]) alternate_url = urlunparse((canonical_scheme, canonical_netloc, path, dummy_params, alternate_query, fragment)) alternate_urls[ln] = alternate_url return canonical_url, alternate_urls
def is_user_owner_of_record(user_info, recid): """Check if the user is owner of the record. I.e. he is the submitter and/or belongs to a owner-like group authorized to 'see' the record. :param user_info: the user_info dictionary that describe the user. :type user_info: user_info dictionary :param recid: the record identifier. :type recid: positive integer :return: True if the user is 'owner' of the record; False otherwise """ from invenio.modules.access.local_config import \ CFG_ACC_GRANT_AUTHOR_RIGHTS_TO_EMAILS_IN_TAGS, \ CFG_ACC_GRANT_AUTHOR_RIGHTS_TO_USERIDS_IN_TAGS if not isinstance(recid, MutableMapping): record = get_record(int(recid)) else: record = recid uid_tags = cfg.get('CFG_ACC_GRANT_AUTHOR_RIGHTS_TO_USERIDS_IN_TAGS', CFG_ACC_GRANT_AUTHOR_RIGHTS_TO_EMAILS_IN_TAGS) email_tags = cfg.get('CFG_ACC_GRANT_AUTHOR_RIGHTS_TO_USERIDS_IN_TAGS', CFG_ACC_GRANT_AUTHOR_RIGHTS_TO_USERIDS_IN_TAGS) return is_user_in_tags(record, user_info, uid_tags, email_tags)
def send_account_activation_email(user): """Send an account activation email.""" expires_in = cfg.get('CFG_WEBSESSION_ADDRESS_ACTIVATION_EXPIRE_IN_DAYS') address_activation_key = EmailConfirmationSerializer( expires_in=timedelta(days=expires_in).total_seconds() ).create_token(user.id, {'email': user.email}) # Render context. ctx = { "ip_address": None, "user": user, "email": user.email, "activation_link": url_for( 'webaccount.access', mailcookie=address_activation_key, _external=True, _scheme='https', ), "days": expires_in, } # Send email send_email( cfg.get('CFG_SITE_SUPPORT_EMAIL'), user.email, _("Account registration at %(sitename)s", sitename=cfg["CFG_SITE_NAME_INTL"].get( getattr(g, 'ln', cfg['CFG_SITE_LANG']), cfg['CFG_SITE_NAME'])), render_template("accounts/emails/activation.tpl", **ctx) )
def is_user_viewer_of_record(user_info, recid): """Check if the user is allow to view the record based in the marc tags. Checks inside CFG_ACC_GRANT_VIEWER_RIGHTS_TO_EMAILS_IN_TAGS i.e. his email is inside the 506__m tag or he is inside an e-group listed in the 506__m tag :param user_info: the user_info dictionary that describe the user. :type user_info: user_info dictionary :param recid: the record identifier. :type recid: positive integer @return: True if the user is 'allow to view' the record; False otherwise @rtype: bool """ from invenio.modules.access.local_config import \ CFG_ACC_GRANT_VIEWER_RIGHTS_TO_EMAILS_IN_TAGS, \ CFG_ACC_GRANT_VIEWER_RIGHTS_TO_USERIDS_IN_TAGS uid_tags = cfg.get('CFG_ACC_GRANT_VIEWER_RIGHTS_TO_USERIDS_IN_TAGS', CFG_ACC_GRANT_VIEWER_RIGHTS_TO_USERIDS_IN_TAGS) email_tags = cfg.get('CFG_ACC_GRANT_VIEWER_RIGHTS_TO_EMAILS_IN_TAGS', CFG_ACC_GRANT_VIEWER_RIGHTS_TO_EMAILS_IN_TAGS) return is_user_in_tags(recid, user_info, uid_tags, email_tags)
def nato_context(): context_script = b64encode( render_template_to_string('analyze/etcd-updater.sh', etcd_url=cfg.get('CFG_ANALYZE_ETCD_URL'), ttl=600, root=cfg.get('CFG_ANALYZE_NODES_KEY'))) context_script_path = '/usr/local/bin/etcd-updater.sh' crontab = b64encode( render_template_to_string('analyze/etcd_updater_cron', context_script_path=context_script_path)) context = { 'write_files': [ { 'encoding': 'b64', 'content': context_script, 'permissions': '755', 'path': context_script_path, }, { 'encoding': 'b64', 'content': crontab, 'permissions': '755', 'path': '/etc/cron.d/etcd_updater' }, ], # run it as soon as the VM is booted 'runcmd': [ [context_script_path], ], } if cfg.get('CFG_ANALYZE_PUBLIC_KEY'): context['ssh_authorized_keys'] = [cfg.get('CFG_ANALYZE_PUBLIC_KEY')] return context
def get_canonical_and_alternates_urls(url, drop_ln=True, washed_argd=None, quote_path=False): """ Given an Invenio URL returns a tuple with two elements. The first is the canonical URL, that is the original URL with CFG_SITE_URL prefix, and where the ln= argument stripped. The second element element is mapping, language code -> alternate URL @param quote_path: if True, the path section of the given C{url} is quoted according to RFC 2396 """ dummy_scheme, dummy_netloc, path, dummy_params, query, fragment = urlparse(url) canonical_scheme, canonical_netloc = urlparse(cfg.get('CFG_SITE_URL'))[0:2] parsed_query = washed_argd or parse_qsl(query) no_ln_parsed_query = [(key, value) for (key, value) in parsed_query if key != 'ln'] if drop_ln: canonical_parsed_query = no_ln_parsed_query else: canonical_parsed_query = parsed_query if quote_path: path = urllib.quote(path) canonical_query = urlencode(canonical_parsed_query) canonical_url = urlunparse((canonical_scheme, canonical_netloc, path, dummy_params, canonical_query, fragment)) alternate_urls = {} for ln in cfg.get('CFG_SITE_LANGS'): alternate_query = urlencode(no_ln_parsed_query + [('ln', ln)]) alternate_url = urlunparse((canonical_scheme, canonical_netloc, path, dummy_params, alternate_query, fragment)) alternate_urls[ln] = alternate_url return canonical_url, alternate_urls
def run(self, *args, **kwargs): logger.info("Fetching CRLs") proc = subprocess.Popen(['fetch-crl'], shell=False) proc.wait() logger.info("Renewing proxy") with NamedTemporaryFile(mode='rw') as new_proxy: cmd = ['voms-proxy-init', '--out', new_proxy.name, '-rfc'] vo = cfg.get('CFG_DELEGATION_VO') if vo: cmd.extend(['--voms', vo]) proc = subprocess.Popen(cmd, shell=False, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) logger.debug("CMD %s", ' '.join(cmd)) out = ''.join([l for l in proc.stdout]) logger.debug("OUTPUT: %s", out) proc.wait() if proc.returncode != 0: # and not _check_proxy_validity(new_proxy): msg = ("Proxy generation failed (%d): %s" % (proc.returncode, out)) logger.error(msg) raise self.retry(Exception(msg)) # dump new proxy to proper location # XXX should this file be locked somehow? with open(cfg.get('CFG_LWDAAP_ROBOT_PROXY'), 'w+') as f: f.write(new_proxy.read()) f.flush() os.chmod(cfg.get('CFG_LWDAAP_ROBOT_PROXY'), stat.S_IRUSR | stat.S_IWUSR)
def index(p, so, page): """ Index page with uploader and list of existing depositions """ ctx = mycommunities_ctx() if not so: so = cfg.get('COMMUNITIES_DEFAULT_SORTING_OPTION') communities = Community.filter_communities(p, so) form = SearchForm() per_page = cfg.get('COMMUNITIES_DISPLAYED_PER_PAGE', 10) page = max(page, 1) ctx.update({ 'pagination': Pagination(page, per_page, communities.count()), 'form': form, 'title': _('Community Collections'), 'communities': communities.slice( per_page*(page-1), per_page*page).all(), }) return render_template( "communities/index.html", **ctx )
def submit_rt_ticket(obj, queue, subject, body, requestors, ticket_id_key): """Submit ticket to RT with the given parameters.""" from inspire.utils.tickets import get_instance # Trick to prepare ticket body body = "\n ".join([line.strip() for line in body.split("\n")]) rt_instance = get_instance() if cfg.get("PRODUCTION_MODE") else None rt_queue = cfg.get("CFG_BIBCATALOG_QUEUES") or queue recid = obj.extra_data.get("recid", "") if not recid: recid = obj.data.get("recid", "") if not rt_instance: obj.log.error("No RT instance available. Skipping!") obj.log.info("Ticket submission ignored.") else: ticket_id = rt_instance.create_ticket( Queue=rt_queue, Subject=subject, Text=body, Requestors=requestors, CF_RecordID=recid ) obj.extra_data[ticket_id_key] = ticket_id obj.log.info("Ticket {0} created:\n{1}".format( ticket_id, body.encode("utf-8", "ignore") )) return True
def send_account_activation_email(user): """Send an account activation email.""" from invenio.modules.access.mailcookie import \ mail_cookie_create_mail_activation expires_in = cfg.get('CFG_WEBSESSION_ADDRESS_ACTIVATION_EXPIRE_IN_DAYS') address_activation_key = mail_cookie_create_mail_activation( user.email, cookie_timeout=timedelta(days=expires_in) ) # Render context. ctx = { "ip_address": None, "user": user, "email": user.email, "activation_link": url_for( 'webaccount.access', mailcookie=address_activation_key, _external=True, _scheme='https', ), "days": expires_in, } # Send email send_email( cfg.get('CFG_SITE_SUPPORT_EMAIL'), user.email, _("Account registration at %(sitename)s", sitename=cfg['CFG_SITE_NAME']), render_template("accounts/emails/activation.tpl", **ctx) )
def index(p, so, page): """Index page with uploader and list of existing depositions.""" ctx = mycommunities_ctx() if not so: so = cfg.get("COMMUNITIES_DEFAULT_SORTING_OPTION") communities = Community.filter_communities(p, so) featured_community = FeaturedCommunity.get_current() form = SearchForm(p=p) per_page = cfg.get("COMMUNITIES_DISPLAYED_PER_PAGE", 10) page = max(page, 1) p = Pagination(page, per_page, communities.count()) ctx.update( { "r_from": max(p.per_page * (p.page - 1), 0), "r_to": min(p.per_page * p.page, p.total_count), "r_total": p.total_count, "pagination": p, "form": form, "title": _("Community Collections"), "communities": communities.slice(per_page * (page - 1), per_page * page).all(), "featured_community": featured_community, "format_record": format_record, } ) return render_template("communities/index.html", **ctx)
def run(self, *args, **kwargs): logger.info("Fetching CRLs") proc = subprocess.Popen(['fetch-crl'], shell=False) proc.wait() logger.info("Renewing proxy") with NamedTemporaryFile(mode='rw') as new_proxy: cmd = ['voms-proxy-init', '--out', new_proxy.name, '-rfc' ] vo = cfg.get('CFG_DELEGATION_VO') if vo: cmd.extend(['--voms', vo]) proc = subprocess.Popen(cmd, shell=False, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) logger.debug("CMD %s", ' '.join(cmd)) out = ''.join([l for l in proc.stdout]) logger.debug("OUTPUT: %s", out) proc.wait() if proc.returncode != 0: # and not _check_proxy_validity(new_proxy): msg = ("Proxy generation failed (%d): %s" % (proc.returncode, out)) logger.error(msg) raise self.retry(Exception(msg)) # dump new proxy to proper location # XXX should this file be locked somehow? with open(cfg.get('CFG_LWDAAP_ROBOT_PROXY'), 'w+') as f: f.write(new_proxy.read()) f.flush() os.chmod(cfg.get('CFG_LWDAAP_ROBOT_PROXY'), stat.S_IRUSR | stat.S_IWUSR)
def filter_step(obj, eng): """Run an external python script.""" from invenio_records.api import Record from invenio.utils.shell import run_shell_command repository = obj.extra_data.get("repository", {}) arguments = repository.get("arguments", {}) script_name = arguments.get("f_filter-file") if script_name: marcxml_value = Record(obj.data.dumps()).legacy_export_as_marc() extract_path = os.path.join( cfg.get('OAIHARVESTER_STORAGEDIR', cfg.get('CFG_TMPSHAREDDIR')), str(eng.uuid)) if not os.path.exists(extract_path): os.makedirs(extract_path) # Now we launch BibUpload tasks for the final MARCXML files marcxmlfile = extract_path + os.sep + str(obj.id) file_fd = open(marcxmlfile, 'w') file_fd.write(marcxml_value) file_fd.close() exitcode, cmd_stdout, cmd_stderr = run_shell_command( cmd="%s '%s'", args=(str(script_name), str(marcxmlfile))) if exitcode != 0 or cmd_stderr != "": obj.log.error( "Error while running filtering script on %s\nError:%s" % (marcxmlfile, cmd_stderr)) else: obj.log.info(cmd_stdout) else: obj.log.error("No script file found!")
def __init__(self): """Initialize provider.""" self.api = DataCiteMDSClient(username=cfg.get('CFG_DATACITE_USERNAME'), password=cfg.get('CFG_DATACITE_PASSWORD'), prefix=cfg.get('CFG_DATACITE_DOI_PREFIX'), test_mode=cfg.get('CFG_DATACITE_TESTMODE', False), url=cfg.get('CFG_DATACITE_URL'))
def _plot_extract(obj, eng): from invenio.utils.plotextractor.api import ( get_tarball_from_arxiv, get_marcxml_plots_from_tarball ) from invenio.modules.workflows.utils import convert_marcxml_to_bibfield from invenio.utils.shell import Timeout if "_result" not in obj.extra_data: obj.extra_data["_result"] = {} repository = obj.extra_data.get("repository", {}) arguments = repository.get("arguments", {}) chosen_type = plotextractor_types if not chosen_type: chosen_type = arguments.get('p_extraction-source', []) if not isinstance(chosen_type, list): chosen_type = [chosen_type] if 'latex' in chosen_type: # Run LaTeX plotextractor if "tarball" not in obj.extra_data["_result"]: extract_path = os.path.join( cfg.get('OAIHARVESTER_STORAGEDIR', cfg.get('CFG_TMPSHAREDDIR')), str(eng.uuid) ) tarball = get_tarball_from_arxiv( obj.data.get(cfg.get('OAIHARVESTER_RECORD_ARXIV_ID_LOOKUP')), extract_path ) if tarball is None: obj.log.error("No tarball found") return obj.extra_data["_result"]["tarball"] = tarball else: tarball = obj.extra_data["_result"]["tarball"] try: marcxml = get_marcxml_plots_from_tarball(tarball) except Timeout: eng.log.error( 'Timeout during tarball extraction on {0}'.format(tarball) ) if marcxml: # We store the path to the directory the tarball contents lives new_dict = convert_marcxml_to_bibfield(marcxml) _attach_files_to_obj(obj, new_dict) obj.update_task_results( "Plots", [{ "name": "Plots", "result": new_dict["fft"], "template": "workflows/results/plots.html" }] )
def make_user_agent_string(component=None): """ Return a nice and uniform user-agent string to be used when Invenio act as a client in HTTP requests. """ ret = "Invenio-%s (+%s; \"%s\")" % (cfg.get('CFG_VERSION'), cfg.get('CFG_SITE_URL'), cfg.get('CFG_SITE_NAME')) if component: ret += " %s" % component return ret
def __init__(self): """Initialize provider.""" self.api = DataCiteMDSClient( username=cfg.get('CFG_DATACITE_USERNAME'), password=cfg.get('CFG_DATACITE_PASSWORD'), prefix=cfg.get('CFG_DATACITE_DOI_PREFIX'), test_mode=cfg.get('CFG_DATACITE_TESTMODE', False), url=cfg.get('CFG_DATACITE_URL') )
def __init__(self, username=None, password=None, url=None, prefix=None, test_mode=None, api_ver="2"): """ Initialize DataCite API. In case parameters are not specified via keyword arguments, they will be read from the Invenio configuration. @param username: DataCite username (or CFG_DATACITE_USERNAME) @type username: str @param password: DataCite password (or CFG_DATACITE_PASSWORD) @type password: str @param url: DataCite API base URL (or CFG_DATACITE_URL). Defaults to https://mds.datacite.org/. @type url: str @param prefix: DOI prefix (or CFG_DATACITE_DOI_PREFIX). Defaults to 10.5072 (DataCite test prefix). @type prefix: str @param test_mode: Set to True to enable test mode (or CFG_DATACITE_TESTMODE). Defaults to False. @type test_mode: boolean @param api_ver: DataCite API version. Currently has no effect. Default to 2. @type api_ver: str """ if not HAS_SSL: warn("Module ssl not installed. Please install with e.g. " "'pip install ssl'. Required for HTTPS connections to " "DataCite.") self.username = username or cfg.get('CFG_DATACITE_USERNAME', '') self.password = password or cfg.get('CFG_DATACITE_PASSWORD', '') self.prefix = prefix or cfg.get('CFG_DATACITE_DOI_PREFIX', '10.5072') self.api_ver = api_ver # Currently not used self.api_url = url or cfg.get('CFG_DATACITE_URL', 'https://mds.datacite.org/') if self.api_url[-1] != '/': self.api_url = self.api_url + "/" if test_mode is not None: self.test_mode = test_mode else: self.test_mode = cfg.get('CFG_DATACITE_TESTMODE', False) # If in test mode, set prefix to 10.5072, the default DataCite test # prefix. if self.test_mode: self.prefix = "10.5072"
def get_vm_connection(client, vm_id): vm = get_vm(client, vm_id) if not vm: return dict( error=True, msg='Instance is not known to the system' ) if vm['status'] != 'ACTIVE': return dict( error=True, msg='Instance must be ACTIVE to get connected to it.' ) u = urlsplit(cfg.get('CFG_ANALYZE_ETCD_URL')) netloc = u[1].split(':') if len(netloc) > 1: etcd_client = etcd.Client(host=netloc[0], port=int(netloc[1])) else: etcd_client = etcd.Client(host=netloc[0]) vm_dir = '/'.join([cfg.get('CFG_ANALYZE_MAPPINGS_KEY', '/'), vm_id]) try: r = etcd_client.read(vm_dir, recursive=True) d = {c.key.split('/')[-1]: c.value for c in r.children} app_env = vm.get('app_env') if app_env == 'ssh': d['user'] = '******' return dict( error=False, msg=('<p>You can connect via SSH to %(ip)s, ' 'port %(port)s with ' 'user "%(user)s":</p>' '<p>ssh -i <your ssh key> -p %(port)s ' '%(user)s@%(ip)s</p>') % d ) elif app_env in ['jupyter-python', 'jupyter-r']: return dict( error=False, msg=('<p>You can connect to <a href="%(http)s" ' 'class="btn btn-info">jupyter</a>.') % d ) else: return dict( error=True, msg='Unknown application environment "%s".' % app_env ) except etcd.EtcdKeyNotFound: return dict( error=True, msg='Connection details are still not available.' ) except etcd.EtcdException as e: return dict( error=True, msg='Unable to get connection details (%s).' % e )
def get_tarball_for_model(eng, arxiv_id): """We download it.""" extract_path = os.path.join( cfg.get('OAIHARVESTER_STORAGEDIR', cfg.get('CFG_TMPSHAREDDIR')), str(eng.uuid) ) return get_tarball_from_arxiv( arxiv_id, extract_path )
def arxiv_fulltext_download(obj, eng): """Perform the fulltext download step for arXiv records. :param obj: Bibworkflow Object to process :param eng: BibWorkflowEngine processing the object """ from invenio.utils.plotextractor.api import get_pdf_from_arxiv if "result" not in obj.extra_data: obj.extra_data["_result"] = {} if "pdf" not in obj.extra_data["_result"]: extract_path = os.path.join( cfg.get('OAIHARVESTER_STORAGEDIR', cfg.get('CFG_TMPSHAREDDIR')), str(eng.uuid)) pdf = get_pdf_from_arxiv( obj.data.get(cfg.get('OAIHARVESTER_RECORD_ARXIV_ID_LOOKUP')), extract_path) arguments = obj.extra_data["repository"]["arguments"] try: if not arguments['t_doctype'] == '': doctype = arguments['t_doctype'] else: doctype = 'arXiv' except KeyError: eng.log.error("WARNING: HASARDOUS BEHAVIOUR EXPECTED, " "You didn't specified t_doctype in argument" " for fulltext_download," "try to recover by using the default one!") doctype = 'arXiv' if pdf: obj.extra_data["_result"]["pdf"] = pdf new_dict_representation = { "fft": [{ "url": pdf, "docfile_type": doctype }] } _attach_files_to_obj(obj, new_dict_representation) fileinfo = { "type": "fulltext", "filename": os.path.basename(pdf), "full_path": pdf, } obj.update_task_results( "PDF", [{ "name": "PDF", "result": fileinfo, "template": "workflows/results/fft.html" }]) else: obj.log.info("No PDF found.") else: eng.log.info("There was already a pdf register for this record," "perhaps a duplicate task in you workflow.")
def register(): req = request.get_legacy_request() # FIXME if cfg.get('CFG_ACCESS_CONTROL_LEVEL_SITE') > 0: return webuser.page_not_authorized(req, "../youraccount/register?ln=%s" % g.ln, navmenuid='youraccount') form = RegisterForm(request.values, csrf_enabled=False) #uid = current_user.get_id() title = _("Register") messages = [] state = "" if form.validate_on_submit(): ruid = webuser.registerUser(req, form.email.data.encode('utf8'), form.password.data.encode('utf8'), form.nickname.data.encode('utf8'), ln=g.ln) if ruid == 0: title = _("Account created") messages.append(_("Your account has been successfully created.")) state = "success" if cfg.get('CFG_ACCESS_CONTROL_NOTIFY_USER_ABOUT_NEW_ACCOUNT') == 1: messages.append(_("In order to confirm its validity, an email message containing an account activation key has been sent to the given email address.")) messages.append(_("Please follow instructions presented there in order to complete the account registration process.")) if cfg.get('CFG_ACCESS_CONTROL_LEVEL_ACCOUNTS') >= 1: messages.append(_("A second email will be sent when the account has been activated and can be used.")) elif cfg.get('CFG_ACCESS_CONTROL_NOTIFY_USER_ABOUT_NEW_ACCOUNT') != 1: user = User.query.filter(User.email == form.email.data.lower()).one() login_user(user.get_id()) messages.append(_("You can now access your account.")) else: title = _("Registration failure") state = "danger" if ruid == 5: messages.append(_("Users cannot register themselves, only admin can register them.")) elif ruid == 6 or ruid == 1: # Note, code 1 is used both for invalid email, and email sending # problems, however the email address is validated by the form, # so we only have to report a problem sending the email here messages.append(_("The site is having troubles in sending you an email for confirming your email address.")) messages.append(_("The error has been logged and will be taken in consideration as soon as possible.")) else: # Errors [-2, (1), 2, 3, 4] taken care of by form validation messages.append(_("Internal error %(ruid)s", ruid=ruid)) elif request.method == 'POST': title = _("Registration failure") state = "warning" return render_template('accounts/register.html', form=form, title=title, messages=messages, state=state)
def get_storage_path(suffix=""): """Return a path ready to store files.""" from invenio.base.globals import cfg storage_path = os.path.join( cfg.get("CFG_PREFIX"), cfg.get("HARVESTER_STORAGE_PREFIX"), suffix ) if not os.path.exists(storage_path): os.makedirs(storage_path) return storage_path
def _plot_extract(obj, eng): from invenio.utils.plotextractor.api import ( get_tarball_from_arxiv, get_marcxml_plots_from_tarball) from invenio.modules.workflows.utils import convert_marcxml_to_bibfield from invenio.utils.shell import Timeout if "_result" not in obj.extra_data: obj.extra_data["_result"] = {} repository = obj.extra_data.get("repository", {}) arguments = repository.get("arguments", {}) chosen_type = plotextractor_types if not chosen_type: chosen_type = arguments.get('p_extraction-source', []) if not isinstance(chosen_type, list): chosen_type = [chosen_type] if 'latex' in chosen_type: # Run LaTeX plotextractor if "tarball" not in obj.extra_data["_result"]: extract_path = os.path.join( cfg.get('OAIHARVESTER_STORAGEDIR', cfg.get('CFG_TMPSHAREDDIR')), str(eng.uuid)) tarball = get_tarball_from_arxiv( obj.data.get( cfg.get('OAIHARVESTER_RECORD_ARXIV_ID_LOOKUP')), extract_path) if tarball is None: obj.log.error("No tarball found") return obj.extra_data["_result"]["tarball"] = tarball else: tarball = obj.extra_data["_result"]["tarball"] try: marcxml = get_marcxml_plots_from_tarball(tarball) except Timeout: eng.log.error( 'Timeout during tarball extraction on {0}'.format(tarball)) if marcxml: # We store the path to the directory the tarball contents lives new_dict = convert_marcxml_to_bibfield(marcxml) _attach_files_to_obj(obj, new_dict) obj.update_task_results( "Plots", [{ "name": "Plots", "result": new_dict["fft"], "template": "workflows/results/plots.html" }])
def arxiv_refextract(obj, eng): """Perform the reference extraction step. :param obj: Bibworkflow Object to process :param eng: BibWorkflowEngine processing the object """ from invenio.legacy.refextract.api import extract_references_from_file_xml from invenio.utils.plotextractor.api import get_pdf_from_arxiv from invenio.modules.workflows.utils import convert_marcxml_to_bibfield if "_result" not in obj.extra_data: obj.extra_data["_result"] = {} try: pdf = obj.extra_data["_result"]["pdf"] except KeyError: pdf = None if not pdf: extract_path = os.path.join( cfg.get('OAIHARVESTER_STORAGEDIR', cfg.get('CFG_TMPSHAREDDIR')), str(eng.uuid) ) pdf = get_pdf_from_arxiv( obj.data.get(cfg.get('OAIHARVESTER_RECORD_ARXIV_ID_LOOKUP')), extract_path ) obj.extra_data["_result"]["pdf"] = pdf if pdf and os.path.isfile(pdf): references_xml = extract_references_from_file_xml( obj.extra_data["_result"]["pdf"] ) if references_xml: updated_xml = '<?xml version="1.0" encoding="UTF-8"?>\n' \ '<collection>\n' + references_xml + \ "\n</collection>" new_dict_representation = convert_marcxml_to_bibfield(updated_xml) if "reference" in new_dict_representation: obj.data["reference"] = new_dict_representation["reference"] obj.log.info("Extracted {0} references".format(len(obj.data["reference"]))) obj.update_task_results( "References", [{"name": "References", "result": new_dict_representation['reference'], "template": "workflows/results/refextract.html"}] ) return else: obj.log.info("No references extracted") else: obj.log.error("Not able to download and process the PDF")
def arxiv_refextract(obj, eng): """Perform the reference extraction step. :param obj: Bibworkflow Object to process :param eng: BibWorkflowEngine processing the object """ from invenio.legacy.refextract.api import extract_references_from_file_xml from invenio.utils.plotextractor.api import get_pdf_from_arxiv from invenio.modules.workflows.utils import convert_marcxml_to_bibfield if "_result" not in obj.extra_data: obj.extra_data["_result"] = {} try: pdf = obj.extra_data["_result"]["pdf"] except KeyError: pdf = None if not pdf: extract_path = os.path.join( cfg.get('OAIHARVESTER_STORAGEDIR', cfg.get('CFG_TMPSHAREDDIR')), str(eng.uuid)) pdf = get_pdf_from_arxiv( obj.data.get(cfg.get('OAIHARVESTER_RECORD_ARXIV_ID_LOOKUP')), extract_path) obj.extra_data["_result"]["pdf"] = pdf if pdf and os.path.isfile(pdf): references_xml = extract_references_from_file_xml( obj.extra_data["_result"]["pdf"]) if references_xml: updated_xml = '<?xml version="1.0" encoding="UTF-8"?>\n' \ '<collection>\n' + references_xml + \ "\n</collection>" new_dict_representation = convert_marcxml_to_bibfield(updated_xml) if "reference" in new_dict_representation: obj.data["reference"] = new_dict_representation["reference"] obj.log.info("Extracted {0} references".format( len(obj.data["reference"]))) obj.update_task_results( "References", [{ "name": "References", "result": new_dict_representation['reference'], "template": "workflows/results/refextract.html" }]) return else: obj.log.info("No references extracted") else: obj.log.error("Not able to download and process the PDF")
def _arxiv_fulltext_download(obj, eng): """Perform the fulltext download step for arXiv records. :param obj: Bibworkflow Object to process :param eng: BibWorkflowEngine processing the object """ from invenio.utils.plotextractor.api import get_pdf_from_arxiv if "result" not in obj.extra_data: obj.extra_data["_result"] = {} if "pdf" not in obj.extra_data["_result"]: extract_path = os.path.join( cfg.get('OAIHARVESTER_STORAGEDIR', cfg.get('CFG_TMPSHAREDDIR')), str(eng.uuid) ) pdf = get_pdf_from_arxiv( obj.data.get(cfg.get('OAIHARVESTER_RECORD_ARXIV_ID_LOOKUP')), extract_path ) if pdf: obj.extra_data["_result"]["pdf"] = pdf new_dict_representation = { "fft": [ { "url": pdf, "docfile_type": doctype } ] } _attach_files_to_obj(obj, new_dict_representation) fileinfo = { "type": "fulltext", "filename": os.path.basename(pdf), "full_path": pdf, } obj.update_task_results( os.path.basename(pdf), [{ "name": "PDF", "result": fileinfo, "template": "workflows/results/files.html" }] ) else: obj.log.info("No PDF found.") else: eng.log.info("There was already a pdf register for this record," "perhaps a duplicate task in you workflow.")
def get_instance(): """Make a RT instance and return it.""" url = cfg.get("CFG_BIBCATALOG_SYSTEM_RT_URL", "") login = cfg.get("CFG_BIBCATALOG_SYSTEM_RT_DEFAULT_USER", "") password = cfg.get("CFG_BIBCATALOG_SYSTEM_RT_DEFAULT_PWD", "") if url: tracker = rt.Rt( url=url, default_login=login, default_password=password, ) tracker.login() return tracker
def get_connection_for_dump_on_slave(): """Return a slave connection for performing dbdump operation on a slave.""" su_user = cfg.get("CFG_DATABASE_SLAVE_SU_USER", "") if "CFG_DATABASE_SLAVE_SU_PASS" not in cfg: cfg["CFG_DATABASE_SLAVE_SU_PASS"] = \ _get_password_from_database_password_file(su_user) connection = connect(host=cfg.get("CFG_DATABASE_SLAVE", ""), port=int(cfg.get("CFG_DATABASE_PORT"), 3306), db=cfg.get("CFG_DATABASE_NAME", ""), user=su_user, passwd=cfg.get("CFG_DATABASE_SLAVE_SU_PASS", ""), use_unicode=False, charset='utf8') connection.autocommit(True) return connection
def load(module='', prefix=''): """ Load and returns a template class, given a module name (like 'websearch', 'webbasket',...). The module corresponding to the currently selected template model (see invenio.conf, variable CFG_WEBSTYLE_TEMPLATE_SKIN) is tried first. In case it does not exist, it returns the default template for that module. """ local = {} # load the right template based on the CFG_WEBSTYLE_TEMPLATE_SKIN and the specified module if CFG_WEBSTYLE_TEMPLATE_SKIN == "default": try: mymodule = __import__("invenio.%s_%stemplates" % (module, prefix), local, local, ["invenio.templates.%s" % (module)]) except ImportError: mymodule = __import__("invenio.legacy.%s.%stemplates" % (module, prefix), local, local, ["invenio.templates.%s" % (module)]) else: try: mymodule = __import__("invenio.%s_templates_%s" % (module, CFG_WEBSTYLE_TEMPLATE_SKIN), local, local, ["invenio.templates.%s_%s" % (module, CFG_WEBSTYLE_TEMPLATE_SKIN)]) except ImportError: mymodule = __import__("invenio.%s_templates" % (module), local, local, ["invenio.templates.%s" % (module)]) if 'inspect-templates' in cfg.get('CFG_DEVEL_TOOLS', []): for method_name in dir(mymodule.Template): if method_name.startswith('tmpl_'): enhance_method(module, mymodule.Template, method_name, method_wrapper) return mymodule.Template()
def get_current_user_records_that_can_be_displayed(qid): """Return records that current user can display. :param qid: query identifier :return: records in intbitset """ CFG_WEBSEARCH_SEARCH_CACHE_TIMEOUT = cfg.get( 'CFG_WEBSEARCH_SEARCH_CACHE_TIMEOUT') @search_results_cache.memoize(timeout=CFG_WEBSEARCH_SEARCH_CACHE_TIMEOUT) def get_records_for_user(qid, uid): from invenio.legacy.search_engine import \ get_records_that_can_be_displayed key = get_search_results_cache_key_from_qid(qid) data = search_results_cache.get(key) if data is None: return intbitset([]) cc = search_results_cache.get(key + '::cc') return get_records_that_can_be_displayed(current_user, intbitset().fastload(data), cc) # Simplifies API return get_records_for_user(qid, current_user.get_id())
def generator(self): """Load function from configuration ``CFG_BIBDOCFILE_FILEDIR``.""" func = cfg.get('RECORD_DOCUMENT_NAME_GENERATOR', default_name_generator) if isinstance(func, six.string_types): func = import_string(func) return func
def record_extraction_from_string( xml_string, oai_namespace="http://www.openarchives.org/OAI/2.0/"): """Given a OAI-PMH XML return a list of every record incl. headers. :param xml_string: OAI-PMH XML :type xml_string: str :param oai_namespace: optionally provide the OAI-PMH namespace :type oai_namespace: str :return: return a list of XML records as string :rtype: str """ if oai_namespace: nsmap = {None: oai_namespace} else: nsmap = cfg.get("OAIHARVESTER_DEFAULT_NAMESPACE_MAP") namespace_prefix = "{{{0}}}".format(oai_namespace) root = etree.fromstring(xml_string) headers = [] headers.extend( root.findall(".//{0}responseDate".format(namespace_prefix), nsmap)) headers.extend( root.findall(".//{0}request".format(namespace_prefix), nsmap)) records = root.findall(".//{0}record".format(namespace_prefix), nsmap) list_of_records = [] for record in records: wrapper = etree.Element("OAI-PMH", nsmap=nsmap) for header in headers: wrapper.append(header) wrapper.append(record) list_of_records.append(etree.tostring(wrapper)) return list_of_records
def filter_out_based_on_date_range(recids, fromdate="", untildate="", set_spec=None): """ Filter out recids based on date range.""" if fromdate: fromdate = normalize_date(fromdate, "T00:00:00Z") else: fromdate = get_earliest_datestamp() fromdate = utc_to_localtime(fromdate) if untildate: untildate = normalize_date(untildate, "T23:59:59Z") else: untildate = get_latest_datestamp() untildate = utc_to_localtime(untildate) if set_spec is not None: ## either it has a value or it empty, thus meaning all records last_updated = get_set_last_update(set_spec) if last_updated is not None: last_updated = utc_to_localtime(last_updated) if last_updated > fromdate: fromdate = utc_to_localtime(get_earliest_datestamp()) recids = intbitset(recids) ## Let's clone :-) if fromdate and untildate: recids &= intbitset(run_sql("SELECT id FROM bibrec WHERE modification_date BETWEEN %s AND %s", (fromdate, untildate))) elif fromdate: recids &= intbitset(run_sql("SELECT id FROM bibrec WHERE modification_date >= %s", (fromdate, ))) elif untildate: recids &= intbitset(run_sql("SELECT id FROM bibrec WHERE modification_date <= %s", (untildate, ))) if cfg.get('CFG_OAI_FILTER_RESTRICTED_RECORDS', True): recids = recids - get_all_restricted_recids() return recids
def index(p, so, page): page = max(page, 1) per_page = cfg.get('INSTRUMENTS_DISPLAYED_PER_PAGE', 9) instruments = getPaginatedInstrumentsByIdUser(current_user['id'], p, page, per_page) count = getCountInstrumentsByIdUser(current_user['id'], p) instruments_json = json.loads(instruments) form = SearchForm() my_array = [None] * 0 for instrument in instruments_json: i = Instrument.from_json(instrument) my_array.append(i) pagination = Pagination(page, per_page, count) ctx = dict( instruments=my_array, form=form, page=page, per_page=per_page, pagination=pagination, ) return render_template("instruments/index.html", **ctx)
def extract_references_from_url_xml(url): """Extract references from the pdf specified in the url. The single parameter is the path to the pdf. It raises FullTextNotAvailable if the url gives a 404 The result is given in marcxml. """ file_request = requests.get(url) filename, filepath = mkstemp( prefix="%s" % (url.split('/')[-1:]), dir=cfg.get("CFG_TMPSHAREDDIR"), ) os.write(filename, file_request.content) os.close(filename) try: try: marcxml = extract_references_from_file_xml(filepath) except IOError as err: if err.code == 404: raise FullTextNotAvailable() else: raise finally: os.remove(filepath) return marcxml
def is_user_viewer_of_record(user_info, recid): """ Check if the user is allow to view the record based in the marc tags inside CFG_ACC_GRANT_VIEWER_RIGHTS_TO_EMAILS_IN_TAGS i.e. his email is inside the 506__m tag or he is inside an e-group listed in the 506__m tag :param user_info: the user_info dictionary that describe the user. :type user_info: user_info dictionary :param recid: the record identifier. :type recid: positive integer @return: True if the user is 'allow to view' the record; False otherwise @rtype: bool """ authorized_emails_or_group = [] for tag in cfg.get('CFG_ACC_GRANT_VIEWER_RIGHTS_TO_EMAILS_IN_TAGS', []): from invenio.legacy.bibrecord import get_fieldvalues authorized_emails_or_group.extend(get_fieldvalues(recid, tag)) for email_or_group in authorized_emails_or_group: if email_or_group in user_info['group']: return True email = email_or_group.strip().lower() if user_info['email'].strip().lower() == email: return True return False
def is_user_owner_of_record(user_info, recid): """Check if the user is owner of the record. I.e. he is the submitter and/or belongs to a owner-like group authorized to 'see' the record. :param user_info: the user_info dictionary that describe the user. :type user_info: user_info dictionary :param recid: the record identifier. :type recid: positive integer :return: True if the user is 'owner' of the record; False otherwise """ authorized_emails_or_group = [] for tag in cfg.get('CFG_ACC_GRANT_AUTHOR_RIGHTS_TO_EMAILS_IN_TAGS', []): from invenio.legacy.bibrecord import get_fieldvalues authorized_emails_or_group.extend(get_fieldvalues(recid, tag)) for email_or_group in authorized_emails_or_group: if email_or_group in user_info['group']: return True email = email_or_group.strip().lower() if user_info['email'].strip().lower() == email: return True if cfg['CFG_CERN_SITE']: # the egroup might be in the form [email protected] if email_or_group.replace('@cern.ch', ' [CERN]') in \ user_info['group']: return True return False
def load(module='', prefix=''): """ Load and returns a template class, given a module name (like 'websearch', 'webbasket',...). The module corresponding to the currently selected template model (see invenio.conf, variable CFG_WEBSTYLE_TEMPLATE_SKIN) is tried first. In case it does not exist, it returns the default template for that module. """ local = {} # load the right template based on the CFG_WEBSTYLE_TEMPLATE_SKIN and the specified module if CFG_WEBSTYLE_TEMPLATE_SKIN == "default": try: mymodule = __import__("invenio.%s_%stemplates" % (module, prefix), local, local, ["invenio.legacy.%s.templates" % (module)]) except ImportError: mymodule = __import__("invenio.legacy.%s.%stemplates" % (module, prefix), local, local, ["invenio.legacy.%s.templates" % (module)]) else: try: mymodule = __import__("invenio.legacy.%s.templates_%s" % (module, CFG_WEBSTYLE_TEMPLATE_SKIN), local, local, ["invenio.legacy.%s.templates" % (module, CFG_WEBSTYLE_TEMPLATE_SKIN)]) except ImportError: mymodule = __import__("invenio.legacy.%s.templates" % (module), local, local, ["invenio.legacy.%s.templates" % (module)]) if 'inspect-templates' in cfg.get('CFG_DEVEL_TOOLS', []): for method_name in dir(mymodule.Template): if method_name.startswith('tmpl_'): enhance_method(module, mymodule.Template, method_name, method_wrapper) return mymodule.Template()
def setup_app(): """Setup OAuth2 provider.""" # Initialize OAuth2 provider oauth2.init_app(current_app) # Configures the OAuth2 provider to use the SQLALchemy models for getters # and setters for user, client and tokens. bind_sqlalchemy(oauth2, db.session, client=Client) # Flask-OAuthlib does not support CACHE_REDIS_URL if cfg['OAUTH2_CACHE_TYPE'] == 'redis' and \ cfg.get('CACHE_REDIS_URL'): from redis import from_url as redis_from_url cfg.setdefault( 'OAUTHLIB_CACHE_REDIS_HOST', redis_from_url(cfg['CACHE_REDIS_URL']) ) # Configures an OAuth2Provider instance to use configured caching system # to get and set the grant token. bind_cache_grant(current_app, oauth2, OAuthUserProxy.get_current_user) # Disables oauthlib's secure transport detection in in debug mode. if current_app.debug or current_app.testing: os.environ['OAUTHLIB_INSECURE_TRANSPORT'] = '1'
def default_name_generator(document): """Return default name of record document with storage path. The path is generated from the uuid using two folder level, being the first two characters the name of the first folder and the second two the name of the second folder. It avoids creating the directories twice but if any of them is not a directory it will raise an OSError exception. :param document: The document to be stored. :returns: Path based on the `_id` of the document. """ uuid = document['_id'] directory = os.path.join(cfg.get('CFG_BIBDOCFILE_FILEDIR'), uuid[0:2], uuid[2:4]) try: os.makedirs(directory) except OSError as e: if e.errno == errno.EEXIST and os.path.isdir(directory): pass else: raise return os.path.join(directory, uuid[4:])
def train(records, output): """Train a set of records from the command line. Usage: inveniomanage predicter train -r /path/to/json -o model.pickle """ if not records: print("Missing records!", file=sys.stderr) return if not os.path.isfile(records): print("{0} is not a file!".format(records), file=sys.stderr) return if os.path.basename(output) == output: # Only a relative name, prefix with config output = os.path.join( cfg.get("CLASSIFIER_MODEL_PATH", ""), output ) # Make sure directories are created if not os.path.exists(os.path.dirname(output)): os.makedirs(output) # Check that location is writable if not os.access(os.path.dirname(output), os.W_OK): print("{0} is not writable file!".format(output), file=sys.stderr) return job = celery_train.delay(records, output) print("Scheduled job {0}".format(job.id))
def getServiceJsonParamenters(): """ Returns the Lifewatch service parameters in JSON format """ lfw_service = cfg.get('CFG_LFW_SERVICE') lfw_service_json = json.dumps(lfw_service) lfw_service_json = json.loads(lfw_service_json) return lfw_service_json