Exemplo n.º 1
0
 def setUp(self):
     unittest.TestCase.setUp(self)
     self.proj_home = tasks.app.conf['PROJ_HOME']
     self.grobid_service = tasks.app.conf['GROBID_SERVICE']
     self._app = tasks.app
     self.app = app.ADSFulltextCelery('test', proj_home=self.proj_home, local_config=\
         {
         })
     tasks.app = self.app  # monkey-patch the app object
Exemplo n.º 2
0
 def setUp(self):
     unittest.TestCase.setUp(self)
     self.proj_home = tasks.app.conf['PROJ_HOME']
     self.grobid_service = tasks.app.conf['GROBID_SERVICE']
     self._app = tasks.app
     self.app = app.ADSFulltextCelery('test', proj_home=self.proj_home, local_config=\
         {
             "CELERY_ALWAYS_EAGER": False,
             "CELERY_EAGER_PROPAGATES_EXCEPTIONS": False,
         })
     tasks.app = self.app  # monkey-patch the app object
Exemplo n.º 3
0
    def setUp(self):
        """
        Sets up the parameters for the RabbitMQ workers, and also the workers
        themselves. Generates all the queues that should be in place for testing
        the RabbitMQ workers.

        :return: no return
        """
        unittest.TestCase.setUp(self)
        self.proj_home = os.path.realpath(os.path.join(os.path.dirname(__file__), '../..'))
        self._app = tasks.app
        self.app = app.ADSFulltextCelery('test', proj_home=self.proj_home, local_config=\
            {
            'FULLTEXT_EXTRACT_PATH': os.path.join(self.proj_home, 'tests/test_unit/stub_data'),
            'CELERY_BROKER': tasks.app.conf['CELERY_BROKER'] + '_test'
            })
        tasks.app = self.app # monkey-patch the app object

        # Build the link files
        build_links(test_name='integration')

        self.meta_path = ''
        self.channel_list = None
Exemplo n.º 4
0
    def setUp(self):
        unittest.TestCase.setUp(self)
        self.proj_home = os.path.realpath(
            os.path.join(os.path.dirname(__file__), '../..'))
        self._app = tasks.app
        self.app = app.ADSFulltextCelery('test', proj_home=self.proj_home, local_config=\
            {
            'FULLTEXT_EXTRACT_PATH': os.path.join(self.proj_home, 'tests/test_unit/stub_data'),
            'CELERY_BROKER': tasks.app.conf['CELERY_BROKER'] + '_test'
            })
        tasks.app = self.app  # monkey-patch the app object

        build_links(test_name='integration')

        PROJ_HOME = self.app.conf['PROJ_HOME']
        self.test_file = \
            os.path.join(PROJ_HOME,
                         'tests/test_integration/stub_data/fulltext.links')
        self.test_file_stub = \
            os.path.join(PROJ_HOME,
                         'tests/test_integration/stub_data/fulltext_stub.links')
        self.test_file_wrong = \
            os.path.join(PROJ_HOME,
                         'tests/test_integration/stub_data/fulltext_wrong.links')
        self.test_file_exists = \
            os.path.join(PROJ_HOME,
                         'tests/test_integration/stub_data/fulltext_exists.links')
        self.test_single_document =\
            os.path.join(PROJ_HOME,
                         'tests/test_integration/stub_data/fulltext_single_document'
                         '.links')

        self.test_stub_xml = \
            os.path.join(PROJ_HOME,
                         'tests/test_unit/stub_data/test.xml')
        self.test_stub_nonstandard_xml = \
            os.path.join(PROJ_HOME,
                         'tests/test_unit/stub_data/test_failure.xml')
        self.test_stub_exml = \
            os.path.join(PROJ_HOME,
                         'tests/test_unit/stub_data/test_elsevier.xml')
        self.test_stub_teixml = \
            os.path.join(PROJ_HOME,
                         'tests/test_unit/stub_data/test.astro-ph-0002105.tei.xml')
        self.test_stub_iso8859 = \
            os.path.join(PROJ_HOME,
                         'tests/test_unit/stub_data/test.stmp_2_1_014010.iop.xml')
        self.test_multi_file = os.path.join(PROJ_HOME, 'tests/test_unit/stub_data/test.xml') + \
            ',' + os.path.join(PROJ_HOME,'tests/test_unit/stub_data/test.xml')
        self.test_stub_html = \
            os.path.join(PROJ_HOME,
                         'tests/test_unit/stub_data/test.html')
        self.test_stub_html_table = \
            os.path.join(PROJ_HOME,
                         'tests/test_unit/stub_data/test_table.html')
        self.test_stub_text = \
            os.path.join(PROJ_HOME,
                         'tests/test_unit/stub_data/test.txt')
        self.test_stub_ocr = \
            os.path.join(PROJ_HOME,
                         'tests/test_unit/stub_data/test.ocr')

        self.test_functional_stub =\
            os.path.join(PROJ_HOME,
                         'tests/test_functional/stub_data/fulltext_functional_tests'
                         '.links')
Exemplo n.º 5
0
if sys.version_info > (3, ):
    from builtins import zip
    from builtins import str
from adsputils import get_date, exceptions
import adsft.app as app_module
from kombu import Queue
from adsft import extraction, checker, writer, reader, ner
from adsmsg import FulltextUpdate
import os
from adsft.utils import TextCleaner

# ============================= INITIALIZATION ==================================== #

proj_home = os.path.realpath(os.path.join(os.path.dirname(__file__), '../'))
app = app_module.ADSFulltextCelery('ads-fulltext',
                                   proj_home=proj_home,
                                   local_config=globals().get(
                                       'local_config', {}))
logger = app.logger

app.conf.CELERY_QUEUES = (
    Queue('check-if-extract', app.exchange, routing_key='check-if-extract'),
    Queue('extract', app.exchange, routing_key='extract'),
    Queue('extract-grobid', app.exchange, routing_key='extract-grobid'),
    Queue('output-results', app.exchange, routing_key='output-results'),
    Queue('facility-ner', app.exchange, routing_key='facility-ner'),
)

logger.debug("Loading spacy models for facilities...")
model1 = ner.load_model(app.conf['NER_FACILITY_MODEL_ACK'])
model2 = ner.load_model(app.conf['NER_FACILITY_MODEL_FT'])
Exemplo n.º 6
0
from __future__ import absolute_import, unicode_literals
from adsputils import get_date, exceptions
import adsft.app as app_module
from kombu import Queue
from adsft import extraction, checker, writer
from adsmsg import FulltextUpdate
import os
from adsft.utils import TextCleaner

# ============================= INITIALIZATION ==================================== #

proj_home = os.path.realpath(os.path.join(os.path.dirname(__file__), '../'))
app = app_module.ADSFulltextCelery('ads-fulltext', proj_home=proj_home)
logger = app.logger

app.conf.CELERY_QUEUES = (
    Queue('check-if-extract', app.exchange, routing_key='check-if-extract'),
    Queue('extract', app.exchange, routing_key='extract'),
    Queue('extract-grobid', app.exchange, routing_key='extract-grobid'),
    Queue('output-results', app.exchange, routing_key='output-results'),
)

# ============================= TASKS ============================================= #


@app.task(queue='check-if-extract')
def task_check_if_extract(message):
    """
    Checks if the file needs to be extracted and pushes to the correct
    extraction queue.
    """