def q_collection(collection_id, enrichment):
    timeout = 10000
    print "ENRICH {} with {}".format(collection_id, enrichment)
    ENQ = CouchDBJobEnqueue()
    ENQ.queue_collection(collection_id, timeout,
                     harvester.post_processing.enrich_existing_couch_doc.main,
                     enrichment
                     )
def main(user_email, cid, url_couchdb=None):
    enq = CouchDBJobEnqueue()
    timeout = 10000
    enq.queue_collection(cid,
                     timeout,
                     harvest_image_for_doc,
                     url_couchdb=url_couchdb,
                     )
Пример #3
0
def main(user_email, cid, url_couchdb=None):
    enq = CouchDBJobEnqueue()
    timeout = 10000
    enq.queue_collection(
        cid,
        timeout,
        harvest_image_for_doc,
        url_couchdb=url_couchdb,
    )
Пример #4
0
def main(args):
    parser = argparse.ArgumentParser(
        description='run an Akara enrichment chain on documents in a \
                collection.')
    parser.add_argument('document_id',
                        help='Registry id for the collection')
    parser.add_argument('enrichment', help='enrichment chain to run')

    args = parser.parse_args(args)
    print(args.collection_id)
    print(args.enrichment)
    enq = CouchDBJobEnqueue()
    timeout = 10000
    enq.queue_collection(args.collection_id, timeout,
                     harvester.post_processing.enrich_existing_couch_doc.main,
                     args.enrichment
                     )
Пример #5
0
class CouchDBJobEnqueueTestCase(TestCase):
    #@patch('redis.client.Redis', autospec=True)
    @patch('harvester.post_processing.couchdb_runner.Redis')
    @httpretty.activate
    def setUp(self, mock_redis):
        self.conf = config()
        self.url_couch_base = self.conf['couchdb_url']
        self.cdb = self.conf['couchdb_dbname']
        print "+++++++++++++confg:{0}".format(self.conf)
        url_head = os.path.join(self.url_couch_base, self.cdb)
        httpretty.register_uri(httpretty.HEAD,
                               url_head,
                               body='',
                               content_length='0',
                               content_type='text/plain; charset=utf-8',
                               connection='close',
                               server='CouchDB/1.5.0 (Erlang OTP/R16B03)',
                               cache_control='must-revalidate',
                               date='Mon, 24 Nov 2014 21:30:38 GMT')

        self._cdbrunner = CouchDBJobEnqueue(rq_queue='test-delete')

        def func_for_test(doc, *args, **kwargs):
            return doc, args, kwargs

        self.function = func_for_test

    @httpretty.activate
    def testCollectionSlice(self):
        '''Test that results are correct for a known couchdb result'''
        url_to_pretty = os.path.join(self.url_couch_base, self.cdb, '_design',
                                     COUCHDB_VIEW.split('/')[0], '_view',
                                     COUCHDB_VIEW.split('/')[1])
        httpretty.register_uri(
            httpretty.GET,
            re.compile(url_to_pretty + ".*$"),
            body=open(DIR_FIXTURES +
                      '/couchdb_by_provider_name-5112.json').read(),
            etag="2U5BW2TDDX9EHZJOO0DNE29D1",
            content_type='application/json',
        )
        #transfer_encoding='chunked', #NOTE: doesn't work with httpretty
        results = self._cdbrunner.queue_collection('5112',
                                                   6000,
                                                   self.function,
                                                   'arg1',
                                                   'arg2',
                                                   kwarg1='1',
                                                   kwarg2=2)
        self.assertEqual(len(results), 3)
        self.assertEqual(results[0].args,
                         ('5112--http://ark.cdlib.org/ark:/13030/kt7580382j',
                          'arg1', 'arg2'))
        self.assertEqual(results[0].kwargs, {'kwarg1': '1', 'kwarg2': 2})
        self.assertEqual(results[0].func_name,
                         'test.test_couchdb_runner.func_for_test')
Пример #6
0
def main(args):
    parser = argparse.ArgumentParser(
        description='run the enrichments stored for a collection.')
    group = parser.add_mutually_exclusive_group(required=True)
    group.add_argument('--collection_id',
                        help='Registry id for the collection')
    group.add_argument('--cid_file',
                        help='File with collection ids for running')
    parser.add_argument('--rq_queue',
			help='Override queue for jobs, normal-stage is default')

    args = parser.parse_args(args)
    Q = 'normal-stage'
    if args.rq_queue:
        Q = args.rq_queue
    enq = CouchDBJobEnqueue(Q)
    timeout = 10000

    cids = []
    if args.collection_id:
        cids = [ args.collection_id ]
    else: #cid file
        with open(args.cid_file) as foo:
            lines = foo.readlines()
        cids = [ l.strip() for l in lines]
    print "CIDS:{}".format(cids)

    for cid in cids:
        url_api = ''.join(('https://registry.cdlib.org/api/v1/collection/',
                    cid, '/'))
        coll = Collection(url_api)
        print coll.id
        enrichments = coll.enrichments_item
        enq.queue_collection(cid, timeout,
                     harvester.post_processing.enrich_existing_couch_doc.main,
                     enrichments
                     )
Пример #7
0
def main(args):
    parser = argparse.ArgumentParser(
        description='run an Akara enrichment chain on documents in a \
                collection.')
    parser.add_argument('collection_id',
                        help='Registry id for the collection')
    parser.add_argument('enrichment', help='File of enrichment chain to run')
    parser.add_argument('--rq_queue',
			help='Override queue for jobs, normal-stage is default')

    args = parser.parse_args(args)
    print "CID:{}".format(args.collection_id)
    print "ENRICH FILE:{}".format(args.enrichment)
    with open(args.enrichment) as enrichfoo:
        enrichments = enrichfoo.read() 
    Q = 'normal-stage'
    if args.rq_queue:
        Q = args.rq_queue
    enq = CouchDBJobEnqueue(Q)
    timeout = 10000
    enq.queue_collection(args.collection_id, timeout,
                     harvester.post_processing.enrich_existing_couch_doc.main,
                     enrichments
                     )
Пример #8
0
class CouchDBJobEnqueueTestCase(TestCase):
    #@patch('redis.client.Redis', autospec=True)
    @patch('harvester.post_processing.couchdb_runner.Redis')
    @httpretty.activate
    def setUp(self, mock_redis):
        self.conf = config()
        self.url_couch_base = self.conf['couchdb_url']
        self.cdb = self.conf['couchdb_dbname']
	print "+++++++++++++confg:{0}".format(self.conf)
        url_head = os.path.join(self.url_couch_base, self.cdb)
        httpretty.register_uri(httpretty.HEAD,
                url_head,
                body='',
                content_length='0',
                content_type='text/plain; charset=utf-8',
                connection='close',
                server='CouchDB/1.5.0 (Erlang OTP/R16B03)',
                cache_control='must-revalidate',
                date='Mon, 24 Nov 2014 21:30:38 GMT'
                )

        self._cdbrunner = CouchDBJobEnqueue(rq_queue='test-delete')
        def func_for_test(doc, *args, **kwargs):
            return doc, args, kwargs
        self.function = func_for_test

    @httpretty.activate
    def testCollectionSlice(self):
        '''Test that results are correct for a known couchdb result'''
        url_to_pretty = os.path.join(self.url_couch_base, self.cdb,
                '_design', COUCHDB_VIEW.split('/')[0],
                '_view', COUCHDB_VIEW.split('/')[1])
        httpretty.register_uri(httpretty.GET,
                re.compile(url_to_pretty+".*$"),
                body=open(DIR_FIXTURES+'/couchdb_by_provider_name-5112.json').read(),
                etag="2U5BW2TDDX9EHZJOO0DNE29D1",
                content_type='application/json',
                )
                #transfer_encoding='chunked', #NOTE: doesn't work with httpretty
        results = self._cdbrunner.queue_collection('5112', 6000, self.function,
                'arg1', 'arg2', kwarg1='1', kwarg2=2)
        self.assertEqual(len(results), 3)
        self.assertEqual(results[0].args, ('5112--http://ark.cdlib.org/ark:/13030/kt7580382j', 'arg1', 'arg2'))
        self.assertEqual(results[0].kwargs, {'kwarg1': '1', 'kwarg2': 2})
        self.assertEqual(results[0].func_name, 'test.test_couchdb_runner.func_for_test')