def download(url, localFileName=None): """ Downloads a file from a remote url """ localName = url2name(url) req = urllib2.Request(url) r = make_invenio_opener('BibEncode').open(req) if 'Content-Disposition' in r.info(): # If the response has Content-Disposition, we take file name from it localName = r.info()['Content-Disposition'].split('filename=')[1] if localName[0] == '"' or localName[0] == "'": localName = localName[1:-1] elif r.url != url: # if we were redirected, the real file name we take from the final URL localName = url2name(r.url) if localFileName: # we can force to save the file as specified name localName = localFileName f = open(localName, 'wb') shutil.copyfileobj(r, f) f.close()
def download(url, localFileName=None): """ Downloads a file from a remote url """ localName = url2name(url) req = urllib2.Request(url) r = make_invenio_opener("BibEncode").open(req) if "Content-Disposition" in r.info(): # If the response has Content-Disposition, we take file name from it localName = r.info()["Content-Disposition"].split("filename=")[1] if localName[0] == '"' or localName[0] == "'": localName = localName[1:-1] elif r.url != url: # if we were redirected, the real file name we take from the final URL localName = url2name(r.url) if localFileName: # we can force to save the file as specified name localName = localFileName f = open(localName, "wb") shutil.copyfileobj(r, f) f.close()
""" from __future__ import print_function __revision__ = "$Id$" import pprint import sys import re import getopt from invenio.legacy.search_engine import perform_request_search from invenio.legacy.bibrecord import get_fieldvalues from invenio.config import CFG_CERN_SITE from invenio.utils.url import make_invenio_opener BIBFORMAT_OPENER = make_invenio_opener('BibFormat') if CFG_CERN_SITE: journal_name_tag = '773__p' else: journal_name_tag = '909C4p' issns = { 'aapps bull.': '0218-2203', 'account. manag. inf. technol.': '0959-8022', 'acm comput. surv.': '0360-0300', 'acm sigplan not.': '0362-1340', 'acm trans. comput. syst.': '0734-2071', 'acm trans. comput.-hum. interact.': '1073-0516', 'acm trans. database syst.': '0362-5915', 'acm trans. graph.': '0730-0301',
from invenio.modules.classifier.errors import TaxonomyError log = bconfig.get_logger("bibclassify.ontology_reader") from invenio import config from invenio.modules.classifier.registry import taxonomies # only if not running in a stanalone mode if bconfig.STANDALONE: dbquery = None from urllib2 import urlopen else: from invenio.legacy import dbquery from invenio.utils.url import make_invenio_opener urlopen = make_invenio_opener('BibClassify').open _contains_digit = re.compile("\d") _starts_with_non = re.compile("(?i)^non[a-z]") _starts_with_anti = re.compile("(?i)^anti[a-z]") _split_by_punctuation = re.compile("(\W+)") _CACHE = {} def get_cache(taxonomy_id): """Return thread-safe cache for the given taxonomy id. :param taxonomy_id: identifier of the taxonomy :type taxonomy_id: str
import sys import urllib import urllib2 import datetime from xml.dom.minidom import parse from time import sleep from invenio.config import CFG_ETCDIR, CFG_CROSSREF_USERNAME, \ CFG_CROSSREF_PASSWORD, CFG_CROSSREF_EMAIL from invenio.legacy.bibconvert.xslt_engine import convert from invenio.legacy.bibrecord import record_get_field_value from invenio.utils.url import make_invenio_opener from invenio.utils.json import json, json_unicode_to_utf8 CROSSREF_OPENER = make_invenio_opener('crossrefutils') FIELDS_JOURNAL = 'issn,title,author,volume,issue,page,year,type,doi'.split(',') FIELDS_BOOK = ('isbn,ser_title,vol_title,author,volume,edition_number,' + 'page,year,component_number,type,doi').split(',') # Exceptions classes class CrossrefError(Exception): """Crossref errors""" def __init__(self, code): """Initialisation""" self.code = code def __str__(self): """Returns error code""" return repr(self.code)
Raises InvenioFileDownloadError exception. """ import urllib2 import time import os import socket import urllib import tempfile import shutil import sys from invenio.utils.url import make_invenio_opener URL_OPENER = make_invenio_opener('filedownloadutils') from invenio.config import (CFG_TMPSHAREDDIR, CFG_BIBUPLOAD_FFT_ALLOWED_LOCAL_PATHS, CFG_WEBSUBMIT_STORAGEDIR) #: block size when performing I/O. CFG_FILEUTILS_BLOCK_SIZE = 1024 * 8 class InvenioFileDownloadError(Exception): """A generic download exception.""" def __init__(self, msg, code=None): Exception.__init__(self, msg) self.code = code
""" from __future__ import print_function __revision__ = "$Id$" import pprint import sys import re import getopt from invenio.legacy.search_engine import perform_request_search from invenio.legacy.bibrecord import get_fieldvalues from invenio.config import CFG_CERN_SITE from invenio.utils.url import make_invenio_opener BIBFORMAT_OPENER = make_invenio_opener('BibFormat') if CFG_CERN_SITE: journal_name_tag = '773__p' else: journal_name_tag = '909C4p' issns = { 'aapps bull.': '0218-2203', 'account. manag. inf. technol.': '0959-8022', 'acm comput. surv.': '0360-0300', 'acm sigplan not.': '0362-1340', 'acm trans. comput. syst.': '0734-2071', 'acm trans. comput.-hum. interact.': '1073-0516', 'acm trans. database syst.': '0362-5915', 'acm trans. graph.': '0730-0301', 'acm trans. inf. syst. secur.': '1094-9224',
""" __revision__ = "$Id$" import getopt import sys import time import re import ConfigParser from invenio.utils.url import make_invenio_opener from invenio.config import CFG_ETCDIR from invenio.legacy.dbquery import run_sql from invenio.modules.ranker.registry import configuration BIBRANK_OPENER = make_invenio_opener('BibRank') opts_dict = {} task_id = -1 def bibrankgkb(config): """Generates a .kb file based on input from the configuration file""" if opts_dict["verbose"] >= 1: write_message("Running: Generate Knowledgebase.") journals = {} journal_src = {} i = 0 #Reading the configuration file while config.has_option("bibrankgkb","create_%s" % i):
from invenio.modules.classifier.errors import TaxonomyError log = bconfig.get_logger("bibclassify.ontology_reader") from invenio import config from invenio.modules.classifier.registry import taxonomies # only if not running in a stanalone mode if bconfig.STANDALONE: dbquery = None from urllib2 import urlopen else: from invenio.legacy import dbquery from invenio.utils.url import make_invenio_opener urlopen = make_invenio_opener("BibClassify").open _contains_digit = re.compile("\d") _starts_with_non = re.compile("(?i)^non[a-z]") _starts_with_anti = re.compile("(?i)^anti[a-z]") _split_by_punctuation = re.compile("(\W+)") _CACHE = {} def get_cache(taxonomy_id): """Return thread-safe cache for the given taxonomy id. :param taxonomy_id: identifier of the taxonomy :type taxonomy_id: str
import sys import urllib import urllib2 import datetime from xml.dom.minidom import parse from time import sleep from invenio.config import CFG_ETCDIR, CFG_CROSSREF_USERNAME, \ CFG_CROSSREF_PASSWORD, CFG_CROSSREF_EMAIL from invenio.legacy.bibconvert.xslt_engine import convert from invenio.legacy.bibrecord import record_get_field_value from invenio.utils.url import make_invenio_opener from invenio.utils.json import json, json_unicode_to_utf8 CROSSREF_OPENER = make_invenio_opener('crossrefutils') FIELDS_JOURNAL = 'issn,title,author,volume,issue,page,year,type,doi'.split(',') FIELDS_BOOK = ('isbn,ser_title,vol_title,author,volume,edition_number,' + 'page,year,component_number,type,doi').split(',') # Exceptions classes class CrossrefError(Exception): """Crossref errors""" def __init__(self, code): """Initialisation""" self.code = code def __str__(self): """Returns error code"""
from datetime import datetime, timedelta from flask import current_app from invenio_base.globals import cfg from invenio.utils.url import make_invenio_opener import rdflib from six import iteritems from six.moves import cPickle from .errors import TaxonomyError from .registry import taxonomies urlopen = make_invenio_opener('classifier').open _contains_digit = re.compile("\d") _starts_with_non = re.compile("(?i)^non[a-z]") _starts_with_anti = re.compile("(?i)^anti[a-z]") _split_by_punctuation = re.compile("(\W+)") _CACHE = {} def get_cache(taxonomy_id): """Return thread-safe cache for the given taxonomy id. :param taxonomy_id: identifier of the taxonomy :type taxonomy_id: str
from datetime import datetime, timedelta from flask import current_app from invenio.base.globals import cfg from invenio.utils.url import make_invenio_opener import rdflib from six import iteritems from six.moves import cPickle from .errors import TaxonomyError from .registry import taxonomies urlopen = make_invenio_opener('classifier').open _contains_digit = re.compile("\d") _starts_with_non = re.compile("(?i)^non[a-z]") _starts_with_anti = re.compile("(?i)^anti[a-z]") _split_by_punctuation = re.compile("(\W+)") _CACHE = {} def get_cache(taxonomy_id): """Return thread-safe cache for the given taxonomy id. :param taxonomy_id: identifier of the taxonomy :type taxonomy_id: str
""" __revision__ = "$Id$" import getopt import sys import time import re import ConfigParser from invenio.utils.url import make_invenio_opener from invenio.config import CFG_ETCDIR from invenio.legacy.dbquery import run_sql from invenio.modules.ranker.registry import configuration BIBRANK_OPENER = make_invenio_opener('BibRank') opts_dict = {} task_id = -1 def bibrankgkb(config): """Generates a .kb file based on input from the configuration file""" if opts_dict["verbose"] >= 1: write_message("Running: Generate Knowledgebase.") journals = {} journal_src = {} i = 0 #Reading the configuration file
determine if a local file is a PDF file. This module is STANDALONE safe """ import os import re from invenio.legacy.bibclassify import config as bconfig if bconfig.STANDALONE: from urllib2 import urlopen else: from invenio.utils.url import make_invenio_opener urlopen = make_invenio_opener('BibClassify').open log = bconfig.get_logger("bibclassify.text_extractor") _ONE_WORD = re.compile("[A-Za-z]{2,}") def is_pdf(document): """Check if a document is a PDF file and return True if is is.""" if not executable_exists('pdftotext'): log.warning("GNU file was not found on the system. " "Switching to a weak file extension test.") if document.lower().endswith(".pdf"): return True return False # Tested with file version >= 4.10. First test is secure and works
import urllib2, time, os, sys, re from invenio.config import CFG_TMPDIR, \ CFG_PLOTEXTRACTOR_SOURCE_BASE_URL, \ CFG_PLOTEXTRACTOR_SOURCE_TARBALL_FOLDER, \ CFG_PLOTEXTRACTOR_SOURCE_PDF_FOLDER, \ CFG_PLOTEXTRACTOR_DOWNLOAD_TIMEOUT from .config import CFG_PLOTEXTRACTOR_DESY_BASE, \ CFG_PLOTEXTRACTOR_DESY_PIECE from invenio.legacy.search_engine import get_record from invenio.legacy.bibrecord import record_get_field_instances, \ field_get_subfield_values from invenio.utils.shell import run_shell_command from .output_utils import write_message from invenio.utils.url import make_invenio_opener PLOTEXTRACTOR_OPENER = make_invenio_opener('plotextractor') PDF_EXTENSION = '.pdf' ARXIV_HEADER = 'arXiv:' HEP_EX = ['hep-ex/', 9405, ARXIV_HEADER + 'hep-ex_'] # experimental # a note about hep-ex: the hep-ex papers from 9403 nad 9404 are stored # in arXiv's servers as hep-ph HEP_LAT = ['hep-lat/', 9107, ARXIV_HEADER + 'hep-lat_'] # lattice HEP_PH = ['hep-ph/', 9203, ARXIV_HEADER + 'hep-ph_'] # phenomenology HEP_TH = ['hep-th/', 9108, ARXIV_HEADER + 'hep-th_'] # theory HEP_AREAS = [HEP_EX, HEP_LAT, HEP_PH, HEP_TH] URL = 0 BEGIN_YEAR_MONTH_INDEX = 1
from datetime import datetime, timedelta from flask import current_app from invenio.base.globals import cfg from invenio.utils.url import make_invenio_opener import rdflib from six import iteritems from six.moves import cPickle from .errors import TaxonomyError from .registry import taxonomies urlopen = make_invenio_opener("classifier").open _contains_digit = re.compile("\d") _starts_with_non = re.compile("(?i)^non[a-z]") _starts_with_anti = re.compile("(?i)^anti[a-z]") _split_by_punctuation = re.compile("(\W+)") _CACHE = {} def get_cache(taxonomy_id): """Return thread-safe cache for the given taxonomy id. :param taxonomy_id: identifier of the taxonomy :type taxonomy_id: str
import invenio.legacy.bibcirculation.db_layer as db from invenio.legacy.bibcirculation.config import \ CFG_BIBCIRCULATION_WORKING_DAYS, \ CFG_BIBCIRCULATION_HOLIDAYS, \ CFG_CERN_SITE, \ CFG_BIBCIRCULATION_ITEM_STATUS_ON_LOAN, \ CFG_BIBCIRCULATION_ITEM_STATUS_ON_SHELF, \ CFG_BIBCIRCULATION_ITEM_STATUS_IN_PROCESS, \ CFG_BIBCIRCULATION_REQUEST_STATUS_PENDING, \ CFG_BIBCIRCULATION_REQUEST_STATUS_WAITING, \ CFG_BIBCIRCULATION_LOAN_STATUS_ON_LOAN, \ CFG_BIBCIRCULATION_LOAN_STATUS_EXPIRED, \ CFG_BIBCIRCULATION_LOAN_STATUS_RETURNED DICC_REGEXP = re.compile("^\{('[^']*': ?('[^']*'|\"[^\"]+\"|[0-9]*|None)(, ?'[^']*': ?('[^']*'|\"[^\"]+\"|[0-9]*|None))*)?\}$") BIBCIRCULATION_OPENER = make_invenio_opener('BibCirculation') def search_user(column, string): if string is not None: string = string.strip() if CFG_CERN_SITE == 1: if column == 'name': result = db.search_borrower_by_name(string) else: if column == 'email': try: result = db.search_borrower_by_email(string) except: result = () else:
import urllib import mimetools import intbitset from invenio.utils.url import make_invenio_opener from invenio.utils.json import json from invenio.config import CFG_SOLR_URL, \ CFG_WEBSEARCH_FULLTEXT_SNIPPETS, \ CFG_WEBSEARCH_FULLTEXT_SNIPPETS_CHARS if CFG_SOLR_URL: import solr SOLR_CONNECTION = solr.SolrConnection(CFG_SOLR_URL) # pylint: disable=E1101 SOLRUTILS_OPENER = make_invenio_opener('solrutils') def solr_get_facets(bitset, solr_url): facet_query_url = "%s/invenio_facets" % solr_url # now use the bitset to fetch the facet data r = urllib2.Request(facet_query_url) data = bitset.fastdump() boundary = mimetools.choose_boundary() # fool solr into thinking we're uploading a file so it will read our data as a stream contents = '--%s\r\n' % boundary contents += 'Content-Disposition: form-data; name="bitset"; filename="bitset"\r\n' contents += 'Content-Type: application/octet-stream\r\n' contents += '\r\n' + data + '\r\n' contents += '--%s--\r\n\r\n' % boundary r.add_data(contents)