def test_zip(self): """ It should generate a zip file containing the specified contents """ from zipfile import ZipFile from occams.celery import Session from occams_datastore import models as datastore from occams_studies import models, tasks from occams_studies.exports.pid import PidPlan owner = datastore.User(key=u'joe') Session.info['blame'] = owner Session.add(owner) Session.flush() export = models.Export( owner_user=owner, contents=[{'name': 'pid', 'title': 'PID', 'versions': []}], status='complete') Session.add(export) Session.flush() tasks.app.settings['studies.export.plans'] = [PidPlan] tasks.make_export(export.name) # @in_transaction removes the session metadata, so we gotta do this export = Session.merge(export) with ZipFile(export.path, 'r') as zfp: file_names = zfp.namelist() assert sorted(['pid.csv', 'codebook.csv']) == sorted(file_names)
def test_zip(self): """ It should generate a zip file containing the specified contents """ from zipfile import ZipFile from occams.celery import Session from occams_datastore import models as datastore from occams_studies import models, tasks from occams_studies.exports.pid import PidPlan owner = datastore.User(key=u'joe') Session.info['blame'] = owner Session.add(owner) Session.flush() export = models.Export(owner_user=owner, contents=[{ 'name': 'pid', 'title': 'PID', 'versions': [] }], status='complete') Session.add(export) Session.flush() tasks.app.settings['studies.export.plans'] = [PidPlan] tasks.make_export(export.name) # @in_transaction removes the session metadata, so we gotta do this export = Session.merge(export) with ZipFile(export.path, 'r') as zfp: file_names = zfp.namelist() assert sorted(['pid.csv', 'codebook.csv']) == sorted(file_names)
def on_failure(self, exc, task_id, args, kwargs, einfo): log.error('Task {0} raised exception: {1!r}\n{2!r}'.format( task_id, exc, einfo)) export = Session.query(models.Export).filter_by(name=task_id).one() export.status = u'failed' redis = app.redis redis.hset(export.redis_key, 'status', export.status) redis.publish('export', json.dumps(redis.hgetall(export.redis_key)))
def celery(request): """ (Function Testing) Sets up a celery application for testing :param request: The pytest context """ import shutil import tempfile import mock from redis import StrictRedis from sqlalchemy import create_engine from occams.celery import Session from occams_datastore import models as datastore from occams_studies import tasks settings = { 'studies.export.dir': tempfile.mkdtemp(), 'celery.blame': USERID } tasks.app.userid = settings['celery.blame'] tasks.app.redis = StrictRedis.from_url(REDIS_URL) tasks.app.settings = settings db_url = request.config.getoption('--db') engine = create_engine(db_url) Session.configure(bind=engine, info={'settings': settings}) Session.add(datastore.User(key=settings['celery.blame'])) Session.flush() commitmock = mock.patch('occams_imports.tasks.Session.commit') commitmock.start() def cleanup(): commitmock.stop() shutil.rmtree(settings['studies.export.dir']) Session.remove() request.addfinalizer(cleanup)
def make_export(name): """ Handles generating exports in a separate process. Because the export is handled in a different process, this method can only accept the id of the entry. This is to avoid race conditions, (http://docs.celeryproject.org/en/latest/userguide/tasks.html#state) All progress will be broadcast to the redis **export** channel with the following dictionary: export_id -- the export being processed owner_user -- the user who this export belongs to count -- the current number of files processed total -- the total number of files that will be processed status -- current status of the export Parameters: export_id -- export to process """ redis = app.redis export = Session.query(models.Export).filter_by(name=name).one() redis.hmset(export.redis_key, { 'export_id': export.id, 'owner_user': export.owner_user.key, 'status': export.status, 'count': 0, 'total': len(export.contents), }) with closing(ZipFile(export.path, 'w', ZIP_DEFLATED)) as zfp: exportables = exports.list_all(Session) for item in export.contents: plan = exportables[item['name']] with tempfile.NamedTemporaryFile() as tfp: exports.write_data(tfp, plan.data( use_choice_labels=export.use_choice_labels, expand_collections=export.expand_collections)) zfp.write(tfp.name, plan.file_name) redis.hincrby(export.redis_key, 'count') data = redis.hgetall(export.redis_key) # redis-py returns everything as string, so we need to clean it for key in ('export_id', 'count', 'total'): data[key] = int(data[key]) redis.publish('export', json.dumps(data)) count, total = data['count'], data['total'] log.info(', '.join(map(str, [count, total, item['name']]))) with tempfile.NamedTemporaryFile() as tfp: codebook_chain = \ [p.codebook() for p in six.itervalues(exportables)] exports.write_codebook(tfp, chain.from_iterable(codebook_chain)) zfp.write(tfp.name, exports.codebook.FILE_NAME) export.status = 'complete' redis.hmset(export.redis_key, { 'status': export.status, 'file_size': humanize.naturalsize(export.file_size) }) redis.publish('export', json.dumps(redis.hgetall(export.redis_key)))
def cleanup(): commitmock.stop() shutil.rmtree(settings['studies.export.dir']) Session.remove()
def apply_direct_mappings(task): # get all the direct mappings for processing mappings = ( Session.query(models.Mapping) .filter_by(type=u'direct').all() ) default_state = ( Session.query(datastore.State) .filter_by(name='pending-entry') .one() ) redis = app.redis total_mappings = len(mappings) count = 0 mappings_id = six.text_type(str(uuid.uuid4())) redis.hmset(mappings_id, { 'count': count, 'total': total_mappings }) for mapping in mappings: source_schema_name = mapping.logic['source_schema'] source_schema_publish_date = mapping.logic['source_schema_publish_date'] source_variable = mapping.logic['source_variable'] target_schema_name = mapping.logic['target_schema'] target_schema_publish_date = mapping.logic['target_schema_publish_date'] target_variable = mapping.logic['target_variable'] # get records that have a matching schema for source schema records = ( Session.query(models.SiteData) .filter( models.SiteData.data['form_name'].astext == source_schema_name ) .filter( models.SiteData.data['form_publish_date'].astext == source_schema_publish_date ).all() ) for record in records: pid = record.data['pid'] collect_date = record.data['collect_date'] patient = ( Session.query(studies.Patient) .filter_by(pid=pid) .one() ) target_schema = ( Session.query(datastore.Schema) .filter_by(name=target_schema_name) .filter_by(publish_date=target_schema_publish_date) ).one() # if the target schema already exists we want to add data # to the schema rather than creating a new entity entity_exists = False for item in patient.entities: if item.schema.name == target_schema_name and \ item.schema.publish_date.isoformat() == \ target_schema_publish_date: entity = item entity_exists = True break if not entity_exists: entity = datastore.Entity( schema=target_schema, collect_date=collect_date, state=default_state ) patient.entities.add(entity) if mapping.logic['choices_mapping']: # add handling if there is no value to map source_key = record.data.get(source_variable) payload = {target_variable: source_key} upload_dir = tempfile.mkdtemp() apply_data(Session, entity, payload, upload_dir) shutil.rmtree(upload_dir) else: # non-choices processing source_key = record.data.get(source_variable) payload = {target_variable: source_key} upload_dir = tempfile.mkdtemp() apply_data(Session, entity, payload, upload_dir) shutil.rmtree(upload_dir) redis.hincrby(mappings_id, 'count') data = redis.hgetall(mappings_id) # redis-py returns everything as string, so we need to clean it for key in ('count', 'total'): data[key] = int(data[key]) redis.publish('direct', json.dumps(data))
def make_export(name): """ Handles generating exports in a separate process. Because the export is handled in a different process, this method can only accept the id of the entry. This is to avoid race conditions, (http://docs.celeryproject.org/en/latest/userguide/tasks.html#state) All progress will be broadcast to the redis **export** channel with the following dictionary: export_id -- the export being processed owner_user -- the user who this export belongs to count -- the current number of files processed total -- the total number of files that will be processed status -- current status of the export Parameters: export_id -- export to process """ redis = app.redis export = Session.query(models.Export).filter_by(name=name).one() redis.hmset( export.redis_key, { 'export_id': export.id, 'owner_user': export.owner_user.key, 'status': export.status, 'count': 0, 'total': len(export.contents), }) with closing(ZipFile(export.path, 'w', ZIP_DEFLATED)) as zfp: exportables = exports.list_all(Session) for item in export.contents: plan = exportables[item['name']] with tempfile.NamedTemporaryFile() as tfp: exports.write_data( tfp, plan.data(use_choice_labels=export.use_choice_labels, expand_collections=export.expand_collections)) zfp.write(tfp.name, plan.file_name) redis.hincrby(export.redis_key, 'count') data = redis.hgetall(export.redis_key) # redis-py returns everything as string, so we need to clean it for key in ('export_id', 'count', 'total'): data[key] = int(data[key]) redis.publish('export', json.dumps(data)) count, total = data['count'], data['total'] log.info(', '.join(map(str, [count, total, item['name']]))) with tempfile.NamedTemporaryFile() as tfp: codebook_chain = \ [p.codebook() for p in six.itervalues(exportables)] exports.write_codebook(tfp, chain.from_iterable(codebook_chain)) zfp.write(tfp.name, exports.codebook.FILE_NAME) export.status = 'complete' redis.hmset( export.redis_key, { 'status': export.status, 'file_size': humanize.naturalsize(export.file_size) }) redis.publish('export', json.dumps(redis.hgetall(export.redis_key)))