Exemplo n.º 1
0
    def setUpClass(cls) -> None:
        """Initialize a database session with in-memory SQLite."""
        from browse.services import database

        cls.database_service = database
        mock_app = mock.MagicMock()
        mock_app.config = {
            'SQLALCHEMY_DATABASE_URI': DATABASE_URL,
            'SQLALCHEMY_TRACK_MODIFICATIONS': False
        }

        mock_app.extensions = {}
        mock_app.root_path = ''

        database.db.init_app(mock_app)
        database.db.app = mock_app
        database.db.create_all()
        database.db.session.commit()

        inst_cornell = database.models.MemberInstitution(
            id=1, name='Cornell University', label='Cornell University')
        database.db.session.add(inst_cornell)

        inst_cornell_ip = database.models.MemberInstitutionIP(
            id=1,
            sid=inst_cornell.id,
            start=2152988672,  # 128.84.0.0
            end=2153054207,  # 128.84.255.255
            exclude=0)
        database.db.session.add(inst_cornell_ip)

        inst_cornell_ip_exclude = \
            database.models.MemberInstitutionIP(
                id=2,
                sid=inst_cornell.id,
                start=2152991233,  # 128.84.10.1
                end=2152991242,    # 128.84.10.10
                exclude=1
            )
        database.db.session.add(inst_cornell_ip_exclude)

        inst_other = database.models.MemberInstitution(
            id=2, name='Other University', label='Other University')
        database.db.session.add(inst_other)

        inst_other_ip = database.models.MemberInstitutionIP(
            id=3,
            sid=inst_other.id,
            start=2152991236,  # 128.84.10.4
            end=2152991242,  # 128.84.10.10
            exclude=0)
        database.db.session.add(inst_other_ip)
        database.db.session.commit()

        sql_dir = path_of_for_test('data/db/sql')
        sql_files: List[str] = glob.glob(f'{sql_dir}/*.sql')
        execute_sql_files(sql_files, database.db.engine)
        database.db.session.commit()
Exemplo n.º 2
0
 def test_split_long_author_list(self):
     f1 = path_of_for_test(
         'data/abs_files/ftp/arxiv/papers/1411/1411.4413.abs')
     meta: metadata = AbsMetaSession.parse_abs_file(filename=f1)
     alst = split_long_author_list(queries_for_authors(str(meta.authors)),
                                   20)
     self.assertIs(type(alst), tuple)
     self.assertIs(len(alst), 3)
     self.assertIs(type(alst[0]), list)
     self.assertIs(type(alst[1]), list)
     self.assertGreater(len(alst[1]), 0)
     self.assertIs(type(alst[2]), int)
Exemplo n.º 3
0
    def test_split_with_collaboration(self):
        f1 = path_of_for_test(
            'data/abs_files/ftp/arxiv/papers/0808/0808.4142.abs')
        meta: metadata = AbsMetaSession.parse_abs_file(filename=f1)

        split = split_authors(str(meta.authors))
        self.assertListEqual(
            split, ['D0 Collaboration', ':', 'V. Abazov', ',', 'et al'])

        alst = queries_for_authors(str(meta.authors))
        self.assertListEqual(alst,
                             [('D0 Collaboration', 'D0 Collaboration'), ': ',
                              ('V. Abazov', 'Abazov, V'), ', ', 'et al'])
Exemplo n.º 4
0
    def test_all_trackback_pings(self) -> None:
        """Test if all trackback pings are counted."""
        doc_sql_file = path_of_for_test(
            'data/db/sql/arXiv_trackback_pings.sql')

        count_from_file = grep_f_count(
            doc_sql_file, '''INSERT INTO `arXiv_trackback_pings`''')
        count_from_db: int = TestBrowseDatabaseService.database_service\
            .count_all_trackback_pings()
        count_from_db_list: int = TestBrowseDatabaseService.database_service\
            .get_all_trackback_pings().__len__()

        if count_from_file is not None:
            self.assertEqual(count_from_db, count_from_file,
                             'Count of all trackback pings are correct')
        else:
            self.assertIsNotNone(count_from_file,
                                 'count of trackback pings is defined')

        self.assertEqual(count_from_db_list, count_from_file,
                         'All trackback pings are returned')
Exemplo n.º 5
0
    def test_split_strange_author_list(self):
        """Test odd author list that shows '0 additional authors' ARXIVNG-2083"""
        f1 = path_of_for_test(
            'data/abs_files/ftp/arxiv/papers/1902/1902.05884.abs')
        meta: metadata = AbsMetaSession.parse_abs_file(filename=f1)
        alst = split_long_author_list(queries_for_authors(str(meta.authors)),
                                      100)

        self.assertIs(type(alst), tuple)
        self.assertIs(len(alst), 3)

        self.assertIs(type(alst[0]), list)
        self.assertIs(type(alst[1]), list)
        self.assertIs(type(alst[2]), int)

        self.assertEqual(
            len(list(filter(lambda x: isinstance(x, tuple), alst[0]))), 101)

        self.assertEqual(len(alst[1]), 0,
                         "Back list on 1902.05884 should be empty")
        self.assertEqual(alst[2], 0,
                         "Back list size on 1902.05884 should be empty")
Exemplo n.º 6
0
"""Tests for arXiv abstract (.abs) parser."""
import os
from unittest import TestCase
from datetime import datetime
from dateutil.tz import tzutc
from browse.domain.metadata import DocMetadata, Submitter, SourceType, \
    VersionEntry
from browse.services.document.metadata import AbsMetaSession
from tests import path_of_for_test

ABS_FILES = path_of_for_test('data/abs_files')


class TestAbsParser(TestCase):
    """Test  parsing metadata from .abs files."""
    def test_bulk_parsing(self):
        """Parse all nonempty .abs files in test set."""
        num_files_tested = 0
        from_regex = r'(?m)From:\s+[^<]+<[^@]+@[^>]+>'
        self.assertRegex('From: J Doe <*****@*****.**>', from_regex)
        for dir_name, subdir_list, file_list in os.walk(ABS_FILES):
            for fname in file_list:
                fname_path = os.path.join(dir_name, fname)
                # skip any empty files
                if os.stat(fname_path).st_size == 0:
                    continue
                if not fname_path.endswith('.abs'):
                    continue
                num_files_tested += 1
                dm = AbsMetaSession.parse_abs_file(filename=fname_path)
                self.assertIsInstance(dm, DocMetadata)
Exemplo n.º 7
0
"""Tests for Google Scholar metadata tag creation. """

import html
import json
import os
import pprint
from unittest import TestCase
from tests import path_of_for_test

from browse.domain.metadata import DocMetadata
from browse.services.util.metatags import meta_tag_metadata
from browse.services.document.metadata import AbsMetaSession
from app import app

CLASSIC_RESULTS_FILE = path_of_for_test(
    'data/classic_scholar_metadata_tags.json')
ABS_FILES = path_of_for_test('data/abs_files')


class TestAgainstClassicResults(TestCase):
    """Test google scholar metadata created from abs files against classic exresults. """
    def setUp(self):
        app.testing = True
        app.config['APPLICATION_ROOT'] = ''
        app.config['SERVER_NAME'] = 'dev.arxiv.org'
        self.app = app

    def test_same_as_classic(self):

        bad_data = [
            '1501.00001v1',
Exemplo n.º 8
0
 def test_collaboration_at_front(self):
     f1 = path_of_for_test('data/abs_files/ftp/arxiv/papers/0808/0808.4142.abs')
     meta = AbsMetaSession.parse_abs_file(filename=f1)
     paflst = parse_author_affil(meta.authors.raw)
     self.assertListEqual(paflst, [['D0 Collaboration', '', ''], ['Abazov', 'V.', '']])