def test_paper_id_is_not_set(self, mock_request): """``paperid`` is not included in the log data.""" mock_request.environ = {'REQUEST_ID': 'foo-id-1234'} stream = StringIO() logger = logging.getLogger('foologger', stream) logger.error('what') captured_value = stream.getvalue() stream.close() self.assertIn('arxiv:null', captured_value, "Paper ID should be null in log messages")
def test_get_logger_with_request(self, mock_request): """The request context is available.""" mock_request.environ = {'REQUEST_ID': 'foo-id-1234'} stream = StringIO() logger = logging.getLogger('foologger', stream) self.assertIsInstance(logger, pyLogging.Logger, "Should return a logging.Logger instance") logger.error('foo') captured_value = stream.getvalue() stream.close() self.assertIn('foo-id-1234', captured_value, "Should include request ID in log messages")
def test_config_sets_loglevel(self, mock_get_config, mock_request): """LOGLEVEL param in config controls log level.""" mock_get_config.return_value = {'LOGLEVEL': 10} mock_request.environ = {'REQUEST_ID': 'foo-id-1234'} stream = StringIO() logger = logging.getLogger('foologger', stream) logger.debug('foo') captured_value = stream.getvalue() stream.close() self.assertIn( 'DEBUG', captured_value, "Changing LOGLEVEL in the app config should change the" " logger log level")
def test_get_logger_no_app_nor_request(self): """There is no application nor request context.""" stream = StringIO() logger = logging.getLogger('foologger', stream) self.assertIsInstance(logger, pyLogging.Logger, "Should return a logging.Logger instance") logger.error('foo') captured_value = stream.getvalue() stream.close() self.assertIn('ERROR: "foo"', captured_value, "Should log normally even if request is not present")
from string import punctuation from elasticsearch_dsl import Search, Q, SF from arxiv.base import logging from search.domain import SimpleQuery, Query, AdvancedQuery, Classification, \ ClassificationList from .util import strip_tex, Q_, is_tex_query, is_literal_query, escape, \ wildcard_escape, remove_single_characters, has_wildcard, is_old_papernum, \ parse_date, parse_date_partial from .highlighting import HIGHLIGHT_TAG_OPEN, HIGHLIGHT_TAG_CLOSE from .authors import author_query, author_id_query, orcid_query logger = logging.getLogger(__name__) START_YEAR = 1991 END_YEAR = datetime.now().year def _query_title(term: str, default_operator: str = 'AND') -> Q: if is_tex_query(term): return Q("match", **{f'title.tex': {'query': term}}) fields = ['title.english'] if is_literal_query(term): fields += ['title'] return Q("query_string", fields=fields, default_operator=default_operator, allow_leading_wildcard=False,
DataRequired from http import HTTPStatus as status from arxiv.taxonomy import CATEGORIES_ACTIVE as CATEGORIES from arxiv.taxonomy import ARCHIVES_ACTIVE as ARCHIVES from arxiv.base import logging, alerts from arxiv.forms import csrf from arxiv.users.domain import Session from arxiv.submission import save, RequestCrossList, Submission from arxiv.submission.exceptions import SaveError from ..util import load_submission from .util import user_and_client_from_session, OptGroupSelectField, \ validate_command logger = logging.getLogger(__name__) # pylint: disable=C0103 Response = Tuple[Dict[str, Any], int, Dict[str, Any]] # pylint: disable=C0103 CONTACT_SUPPORT = Markup( 'If you continue to experience problems, please contact' ' <a href="mailto:[email protected]"> arXiv support</a>.' ) class HiddenListField(HiddenField): def process_formdata(self, valuelist): self.data = list(str(x) for x in valuelist if x) def process_data(self, value):
QueryError, IndexConnectionError, DocumentNotFound, IndexingError, OutsideAllowedRange, MappingError, ) from search.services.index.util import MAX_RESULTS from search.services.index.advanced import advanced_search from search.services.index.simple import simple_search from search.services.index.api import api_search from search.services.index.classic_api import classic_search from search.services.index import highlighting from search.services.index import results logger = logging.getLogger(__name__) # Disable the Elasticsearch logger. When enabled, the Elasticsearch logger # dumps entire Tracebacks prior to propagating exceptions. Thus we end up with # tracebacks in the logs even for handled exceptions. logging.getLogger("elasticsearch").disabled = True ALL_SEARCH_FIELDS = [ "author", "title", "abstract", "comments", "journal_ref", "acm_class", "msc_class", "report_num",
import sys sys.path.append('.') from unittest import mock from references.process import extract from references.process.merge import align, arbitrate, beliefs, normalize, priors import os from pprint import pprint import csv from arxiv.base import logging logging.getLogger('references.process.extract').setLevel(40) basepath = os.path.abspath('evaluation/pdfs') if __name__ == '__main__': with open('evaluation/referenceCounts.csv') as f: raw = [row for row in csv.reader(f)] referenceCounts = [{k: row[i] for i, k in enumerate(raw[0])} for row in raw if len(row) == len(raw[0])] for row in referenceCounts: full_path = os.path.join(basepath, row['pdf']) if not os.path.exists(full_path): continue document_id = row['pdf'][:-4] print('Extracting %s' % document_id) extractions = extract.extract(full_path, document_id) for extractor, refs in extractions.items(): print(extractor, len(refs), row['N'])