示例#1
0
def regenerate(args, update_catalog=False, mongodb=None):

    if settings_module.parse_boolean(os.environ.get('MAKETESTS', '0')):
        DESTPATH = os.path.join(tempfile.mkdtemp(),
                                'experiment_reference.json')
    else:
        DESTPATH = os.path.join(os.getcwd(), 'datacatalog', 'definitions',
                                'jsondocs', 'experiment_reference.json')
        update_catalog = True

    logger.debug('Project config: ' + PARENT + '/config.yml')
    project_settings = config.read_config(places_list=[PARENT])
    logger.debug('Local config:' + THIS + '/config.yml')
    bootstrap_settings = config.read_config(places_list=[THIS])
    settings = dicthelpers.data_merge(project_settings, bootstrap_settings)

    env = args.environment
    if env is None:
        env = 'development'
    db = settings.get(env)

    schema = {
        'description': 'Experiment reference enumeration',
        'type': 'string',
        'enum': []
    }

    challenges = ChallengeMapping(settings['experiment_reference'],
                                  settings['google_client'])
    challenges.populate()
    for cp in challenges.filescache:
        if cp.get('uri', None) is not None:
            google_sheets_id = os.path.basename(cp.get('uri', None))
            cp_uuid = identifiers.typeduuid.catalog_uuid(
                cp.get('id'), 'challenge_problem')
            cp_settings = copy.deepcopy(settings['experiment_reference'])
            cp_settings['google_sheets_id'] = google_sheets_id

            # Generate the experiment designs for each CP
            mapping = ExperimentReferenceMapping(cp_settings,
                                                 settings['google_client'])
            mapping.populate()
            if update_catalog:
                if mongodb is None:
                    mongodb = db['mongodb']
                store = linkedstores.experiment_design.ExperimentDesignStore(
                    mongodb)
                for doc in mapping.filescache:
                    # print(doc)
                    if doc['experiment_design_id'] != 'Unknown':
                        doc['child_of'].append(cp_uuid)
                    logger.info('SYNCING {}'.format(doc.get('title', None)))
                    store.add_update_document(doc)

            for rec in mapping.filescache:
                if rec['experiment_design_id'] not in schema['enum']:
                    schema['enum'].append(rec['experiment_design_id'])

    json.dump(schema, open(DESTPATH, 'w'), indent=2)
    return True
示例#2
0
def main(args):

    logger.debug('Project config: ' + PARENT + '/config.yml')
    project_settings = config.read_config(places_list=[PARENT])
    logger.debug('Local config:' + THIS + '/config.yml')
    bootstrap_settings = config.read_config(places_list=[THIS])
    settings = dicthelpers.data_merge(project_settings, bootstrap_settings)

    env = args.environment
    if env is None:
        env = 'localhost'
    if args.verbose is True:
        settings['verbose'] = True
    else:
        settings['verbose'] = False

    mongodb = settings.get(env).get('mongodb')

    if args.command == 'list':
        dblist(mongodb, settings)
    elif args.command == 'auto':
        autobuild(mongodb, settings)
    elif args.command == 'create':
        raise NotImplementedError()
    elif args.command == 'delete':
        raise NotImplementedError()
示例#3
0
def main(args):

    logger.debug('Project config: ' + PARENT + '/config.yml')
    project_settings = config.read_config(places_list=[PARENT])
    logger.debug('Local config:' + THIS + '/config.yml')
    bootstrap_settings = config.read_config(places_list=[THIS])
    settings = dicthelpers.data_merge(project_settings, bootstrap_settings)

    env = args.environment
    if env is None:
        env = 'localhost'

    if args.verbose is True:
        settings['verbose'] = True
    else:
        settings['verbose'] = False

    mongodb = settings.get(env).get('mongodb')
    mongodb_uri = mongo.get_mongo_uri(mongodb)
    logger.debug('URI: {}'.format(mongodb_uri))
    database_name = None
    if args.database is not None:
        database_name = args.database
    else:
        database_name = settings.get(env).get('mongodb',
                                              {}).get('database', None)
    logger.debug('DB: {}'.format(database_name))

    myclient = MongoClient(mongodb_uri)
    idb = myclient[database_name]

    if args.command == 'discover':
        autodiscover(idb, settings)
    elif args.command == 'auto':
        autobuild(idb, settings)
    elif args.command == 'create':
        raise NotImplementedError()
    elif args.command == 'delete':
        raise NotImplementedError()
def regenerate(args, update_catalog=False, mongodb=None):

    if settings_module.parse_boolean(os.environ.get('MAKETESTS', '0')):
        DESTPATH = os.path.join(tempfile.mkdtemp(),
                                'challenge_problem_id.json')
    else:
        DESTPATH = os.path.join(os.getcwd(), 'datacatalog', 'definitions',
                                'jsondocs', 'challenge_problem_id.json')
        update_catalog = True

    logger.debug('Project config: ' + PARENT + '/config.yml')
    project_settings = config.read_config(places_list=[PARENT])
    logger.debug('Local config:' + THIS + '/config.yml')
    bootstrap_settings = config.read_config(places_list=[THIS])
    settings = dicthelpers.data_merge(project_settings, bootstrap_settings)

    env = args.environment
    if env is None:
        env = 'development'
    db = settings.get(env)

    mapping = ChallengeMapping(settings['experiment_reference'],
                               settings['google_client'])
    mapping.populate()

    # # Experiment records: Insert into experiment_reference collection
    # # FIXME - We don't know which challenge_problem they are children of
    schemadef = mapping.populate().generate_schema_definitions()
    json.dump(schemadef, open(DESTPATH, 'w'), indent=2)

    if update_catalog:
        if mongodb is None:
            mongodb = db['mongodb']
        store = linkedstores.challenge_problem.ChallengeStore(mongodb)
        for doc in mapping.filescache:
            logger.info('SYNCING {}'.format(doc.get('title', None)))
            store.add_update_document(doc)

    return True
def main(args):

    logger.debug('Project config: ' + PARENT + '/config.yml')
    project_settings = config.read_config(places_list=[PARENT])
    logger.debug('Local config:' + THIS + '/config.yml')
    bootstrap_settings = config.read_config(places_list=[THIS])
    settings = dicthelpers.data_merge(project_settings, bootstrap_settings)

    # mongodb = project_settings.get('mongodb')
    # mongodb_uri = datacatalog.mongo.get_mongo_uri(mongodb)
    # myclient = MongoClient(mongodb_uri)

    env = args.environment
    if env is None:
        env = 'localhost'
    if args.verbose is True:
        settings['verbose'] = True
    else:
        settings['verbose'] = False

    mongodb = settings.get(env).get('mongodb')
    mongodb_root = {
        'host': mongodb['host'],
        'port': mongodb['port'],
        'username': '******',
        'password': mongodb['root_password']
    }
    mongodb_uri = mongo.get_mongo_uri(mongodb_root)
    logger.debug('MongoDB: {}'.format(mongodb_uri))
    myclient = MongoClient(mongodb_uri)
    database_name = mongodb.get('database', args.database)

    if database_name is not None:
        logger.info('Ensuring existing of {}'.format(database_name))
        myclient[database_name]
        myclient[database_name]['_keep'].insert_one(
            {'note': 'database provisioned'})
        roles = [{'role': 'dbOwner', 'db': database_name}]
        try:
            myclient['admin'].command("createUser",
                                      mongodb['username'],
                                      pwd=mongodb['password'],
                                      roles=roles)
        except OperationFailure:
            myclient['admin'].command("updateUser",
                                      mongodb['username'],
                                      pwd=mongodb['password'],
                                      roles=roles)
        except Exception as opf:
            logger.warning(opf)
        try:
            myclient[database_name].command("createUser",
                                            mongodb['username'],
                                            pwd=mongodb['password'],
                                            roles=roles)
        except OperationFailure:
            myclient[database_name].command("updateUser",
                                            mongodb['username'],
                                            pwd=mongodb['password'],
                                            roles=roles)
        except Exception as opf:
            logger.warning(opf)
        # except OperationFailure:
        #     pass
    else:
        raise Exception(
            'Failed to find database name in config or command line options')
示例#6
0
ENVIRONMENT = os.environ.get('DB_ENV', 'localhost')

HERE = os.getcwd()
SELF = __file__
THIS = os.path.dirname(SELF)
PARENT = os.path.dirname(THIS)
GPARENT = os.path.dirname(PARENT)

# Use local not installed install of datacatalog
if HERE not in sys.path:
    sys.path.insert(0, HERE)
from datacatalog.identifiers import abaco
from datacatalog import dicthelpers

project_settings = config.read_config(places_list=[PARENT])
bootstrap_settings = config.read_config(places_list=[THIS])
settings = dicthelpers.data_merge(project_settings, bootstrap_settings)

settings = AttrDict({
    'mongodb': settings.get(ENVIRONMENT, {}).get('mongodb'),
    'pipelines': {'pipeline_uuid': '106c46ff-8186-5756-a934-071f4497b58d',
                  'pipeline_manager_id': abaco.actorid.mock(),
                  'pipeline_manager_nonce': abaco.nonceid.mock(),
                  'job_manager_id': abaco.actorid.mock(),
                  'job_manager_nonce': abaco.nonceid.mock(),
                  'job_indexer_id': abaco.actorid.mock(),
                  'job_indexer_nonce': abaco.nonceid.mock()}

})
示例#7
0
def main(args):

    logger.setLevel(logging.DEBUG)

       def get_v1_items(filter={}):
            """Returns a cursor of v1 items"""
            return v1_stores['pipelinejob'].find(filter=filter)

        def get_v2_items():
            """Returns a cursor of v1 items"""
            v2_stores['pipelinejob'].find(filter)

        settings = config.read_config()
        mongodb_v2 = settings.get('mongodb')
        mongodb_v1 = copy.copy(mongodb_v2)
        # Make overridable

        mongodb_v1['database'] = 'catalog'
        db1 = datacatalog.mongo.db_connection(mongodb_v1)
        v1_stores = dict()
        v1_stores['pipeline'] = db1['pipelines']
        v1_stores['pipelinejob'] = db1['jobs']

        v2_stores = dict()
        v2_stores['pipeline'] = datacatalog.linkedstores.pipeline.PipelineStore(mongodb_v2)
        v2_stores['pipelinejob'] = datacatalog.linkedstores.pipelinejob.PipelineJobStore(mongodb_v2)

        jobs = get_v1_items()
        jc = 0
        logger.info('Jobs found: %s', jobs.count())

        for job in jobs:

            job_doc = dict()
            jc = jc + 1
            logger.debug('Processing job %s', jc)
            # Lift over UUID
            try:
                ouuid = str(job['uuid'])
                nuuid = typeduuid.catalog_uuid_from_v1_uuid(ouuid, uuid_type='pipelinejob')
            except Exception:
                logger.critical('Unable to translate %s. Skipping.', ouuid)
                continue

            try:
                opuuid = str(job['pipeline_uuid'])
                npuuid = typeduuid.catalog_uuid_from_v1_uuid(opuuid, uuid_type='pipeline')
            except Exception:
                logger.critical('Unable to translate %s. Skipping.', opuuid)
                continue

            logger.info('UUID %s remapped to %s', ouuid, nuuid)

            # Don't overwrite previously migrated jobs
            if v2_stores['pipelinejob'].coll.find_one({'uuid': nuuid}) is not None:
                logger.critical('Destination job exists. Skipping.')
                continue

            job_doc['uuid'] = nuuid
            job_doc['archive_path'] = os.path.join('/', job['path'])
            job_doc['archive_system'] = 'data-sd2e-community'
            job_doc['session'] = job.get('session',
                                         interestinganimal.generate(
                                             timestamp=False))
            job_doc['updated'] = job.get('updated')
            job_doc['state'] = job.get('status', 'CREATED')
            job_doc['last_event'] = job.get('last_event', 'update').lower()
            job_doc['pipeline_uuid'] = npuuid
            # Linkages
            job_doc['generated_by'] = [npuuid]
            job_doc['child_of'] = list()
            job_doc['derived_from'] = list()

            # Agent/task
            if 'actor_id' in job:
                job_doc['agent'] = 'https://api.sd2e.org/actors/v2/' + job.get('actor_id')
            else:
                job_doc['agent'] = 'https://api.sd2e.org/actors/v2/MEzqaw4rkWZoK'
            job_doc['task'] = None

            # Lift over top-level data
            old_data = job.get('data', dict())
            new_data = dict()

            # Lift over parameters
            # Also establish derived_from params
            for oldkey, newkey, uuid_type in [
                ('sample_id', 'sample_id', 'sample'),
                ('experiment_reference', 'experiment_design_id', 'experiment'),
                    ('measurement_id', 'measurement_id', 'measurement')]:
                old_data_filtered = copy.deepcopy(old_data)
                if oldkey in old_data:
                    new_data[newkey] = old_data[oldkey]
                    old_data_filtered.pop(oldkey)
                    value_uuid = typeduuid.catalog_uuid(
                        old_data[oldkey], uuid_type=uuid_type)
                    job_doc['derived_from'].append(value_uuid)

            # Merge lifted data and other data fields
            new_data = data_merge(old_data_filtered, new_data)
            if new_data is None:
                new_data = dict()
            job_doc['data'] = new_data

            # Port job history
            v2_history = list()
            for v1_event in job.get('history', []):
                v2_name = list(v1_event.keys())[0]
                v2_event = {'date': v1_event.get(v2_name).get('date'),
                            'data': v1_event.get(v2_name, {}).get('data', dict()),
                            'name': v2_name.lower(),
                            'uuid': typeduuid.generate(
                                uuid_type='pipelinejob_event',
                                binary=False)}
                if v2_event['data'] is None:
                    v2_event['data'] = dict()
                v2_history.append(v2_event)
            v2_history = sorted(v2_history, key=lambda k: k['date'])
            job_doc['history'] = v2_history

            # Set system-managed keys
            job_doc = v2_stores['pipelinejob'].set_private_keys(
                job_doc, source=SELF)

            if args.verbose:
                pprint(job_doc)

            resp = v2_stores['pipelinejob'].coll.insert_one(job_doc)
            logger.debug('Inserted document {}'.format(
                resp.inserted_id))