Exemplo n.º 1
0
def sync(Lochness, subject, dry=False):
    logger.debug(f'exploring {subject.study}/{subject.id}')

    for alias, daris_uids in iter(subject.daris.items()):
        Keyring = Lochness['keyring'][alias]
        token = Keyring['TOKEN']
        url = Keyring['URL']
        project_cid = Keyring['PROJECT_CID']

        for daris_uid in daris_uids:
            dirname = tree.get('mri',
                               subject.protected_folder,
                               processed=False,
                               BIDS=Lochness['BIDS'])
            metadata_dst_dir = tree.get('mri',
                                        subject.protected_folder,
                                        processed=True,
                                        BIDS=Lochness['BIDS'])
            metadata_dst = Path(metadata_dst_dir) / f'{daris_uid}_metadata.csv'

            dst_zipfile = os.path.join(dirname, 'tmp.zip')
            timestamp_loc = os.path.join(dirname, '.latest_pull_timestamp')

            # load the time of the lastest data pull from daris
            # estimated from the mtime of the zip file downloaded
            if Path(timestamp_loc).is_file():
                latest_pull_mtime = load_latest_pull_timestamp(timestamp_loc)
            else:
                latest_pull_mtime = 0

            if not dry:
                tmpdir = tf.mkdtemp(dir=dirname, prefix='.')
                os.chmod(tmpdir, 0o0755)

                # execute the curl command
                logger.info(f'Downloading from DaRIS for {daris_uid}')
                daris_download(daris_uid, latest_pull_mtime, token,
                               project_cid, url, dst_zipfile)

                with zipfile.ZipFile(dst_zipfile, 'r') as zip_ref:
                    zip_ref.extractall(tmpdir)

                nfiles_in_dirs = []
                for root, dirs, files in os.walk(tmpdir):
                    for directory in dirs:
                        os.chmod(os.path.join(root, directory), 0o0755)
                    for f in files:
                        os.chmod(os.path.join(root, f), 0o0755)
                    nfiles_in_dirs.append(len(files))

                # if there is any new file downloaded save timestamp
                if any([x > 1 for x in nfiles_in_dirs]):
                    logger.info(f'New MRI file downloaded for {daris_uid}')
                    save_latest_pull_timestamp(dst_zipfile, timestamp_loc)

                    # write metadata in the processed folder
                    collect_all_daris_metadata(tmpdir, metadata_dst)

                shutil.copytree(tmpdir, dirname, dirs_exist_ok=True)
                os.remove(dst_zipfile)
Exemplo n.º 2
0
def sync(Lochness, subject, dry=False, datatypes=None):
    logger.debug('exploring {0}/{1}'.format(subject.study, subject.id))
    hdd_root = Lochness['coombs']['hdd_root']
    general_folder = os.path.join(Lochness['phoenix_root'], 'GENERAL')
    protected_folder = os.path.join(Lochness['phoenix_root'], 'PROTECTED')
    # PHOENIX/
    hdd_phoenix_dir = os.path.join(hdd_root, 'PHOENIX')
    if not os.path.exists(hdd_phoenix_dir):
        raise HDDError('directory not found {0}'.format(hdd_phoenix_dir))
    # PHOENIX/{datatype}
    for datatype in hdd.listdir(hdd_phoenix_dir, IGNORE):
        if datatypes and datatype not in datatypes:
            continue
        datatype_dir = os.path.join(hdd_phoenix_dir, datatype)
        # PHOENIX/{datatype}/{study}
        for study in hdd.listdir(datatype_dir, IGNORE):
            phoenix_study_dir = os.path.join(general_folder, study)
            phoenix_sids = lochness.listdir(Lochness, phoenix_study_dir)
            study_dir = os.path.join(datatype_dir, study)
            # PHOENIX/{datatype}/{study}/{sid}
            for sid in hdd.listdir(study_dir, IGNORE):
                if sid not in phoenix_sids:
                    logger.warn('subject not in PHOENIX {0}/{1}'.format(
                        study, sid))
                    continue
                src = os.path.join(study_dir, sid)
                _dst = tree.get(datatype,
                                general_folder,
                                subject,
                                makedirs=False)
                ssh.makedirs(Lochness, _dst)
                dst = '{0}@{1}:{2}'.format(Lochness['ssh_user'],
                                           Lochness['ssh_host'], _dst)
                hdd.rsync(src, dst, dry=dry)
        raw_input('hit enter to continue to next datatype')
Exemplo n.º 3
0
def sync(Lochness, subject, dry):
    delete = lochness.dropbox.delete_on_success(Lochness, Basename)
    logger.debug('delete_on_success for {0} is {1}'.format(Basename, delete))
    for dbx_sid in subject.dropbox[Module]:
        logger.debug('exploring {0}/{1}'.format(subject.study, subject.id))
        _passphrase = keyring.passphrase(Lochness, subject.study)
        enc_key = cryptease.kdf(_passphrase)
        api_token = keyring.dropbox_api_token(Lochness, Module)
        client = dropbox.Dropbox(api_token)
        for datatype, products in iter(CONFIG.items()):
            dbx_head = os.path.join(os.sep, datatype, subject.study, dbx_sid)
            dbx_head_len = len(dbx_head)
            logger.debug('walking %s', dbx_head)
            for root, dirs, files in lochness.dropbox.walk(client, dbx_head):
                for f in files:
                    dbx_tail = os.path.join(root,
                                            f)[dbx_head_len:].lstrip(os.sep)
                    dbx_file = (dbx_head, dbx_tail)
                    product = _find_product(dbx_tail,
                                            products,
                                            subject=dbx_sid)
                    if not product:
                        continue
                    protect = product.get('protect', False)
                    compress = product.get('compress', False)
                    key = enc_key if protect else None
                    output_base = subject.protected_folder if protect else subject.general_folder
                    output_base = tree.get(datatype, output_base)
                    lochness.dropbox.save(client,
                                          dbx_file,
                                          output_base,
                                          key=key,
                                          compress=compress,
                                          delete=delete,
                                          dry=dry)
Exemplo n.º 4
0
def sync(Lochness, subject, dry):
    delete = lochness.dropbox.delete_on_success(Lochness, Basename)
    logger.debug('delete_on_success for {0} is {1}'.format(Basename, delete))
    for dbx_sid in subject.dropbox[Module]:
        logger.debug('exploring {0}/{1}'.format(subject.study, subject.id))
        _passphrase = keyring.passphrase(Lochness, subject.study)
        enc_key = cryptease.kdf(_passphrase)
        api_token = keyring.dropbox_api_token(Lochness, Module)
        client = dropbox.Dropbox(api_token)
        patterns = _batch_compile(CONFIG, dbx_sid)
        for category,datatype in _iterate(CONFIG):
            output_base = subject.protected_folder if category == 'PROTECTED' else subject.general_folder
            output_base = tree.get(datatype, output_base)
            dbx_head = os.path.join(os.sep, datatype, subject.study)
            # shim the dropbox head for certain data types
            if datatype == 'onsite_interview':
                dbx_head = os.path.join(dbx_head, 'output')
            elif datatype == 'behav_qc':
                dbx_head = os.path.join(dbx_head, dbx_sid)
            dbx_head_len = len(dbx_head)
            for root,dirs,files in lochness.dropbox.walk(client, dbx_head):
                for f in files:
                    dbx_tail = os.path.join(root, f)[dbx_head_len:].lstrip(os.sep)
                    dbx_file = dbx_head,dbx_tail
                    if patterns[datatype].match(dbx_tail):
                        key = enc_key if category == 'PROTECTED' else None
                        lochness.dropbox.save(client, dbx_file, output_base,
                                             key=key, delete=delete, dry=dry)
Exemplo n.º 5
0
def sync(Lochness, subject, dry=False):
    logger.debug('exploring {0}/{1}'.format(subject.study, subject.id))
    for alias, icognition_ids in iter(subject.icognition.items()):
        base, user, password = credentials(Lochness, alias)
        with requests.Session() as s:
            for label in icognition_ids:
                # login
                url = '{0}/admin/application/login/login.php'.format(base)
                data = {'username': user, 'password': password}
                r = s.post(url, data=data, verify=False)
                if r.status_code != requests.codes.OK:
                    raise iCognitionError('login url {0} responded {1}'.format(
                        r.url, r.status_code))
                # get subject data
                params = {'id': label}
                url = '{0}/admin/application/download/csvdata.php'.format(base)
                r = s.get(url, params=params, stream=True, verify=False)
                if r.status_code == requests.codes.NOT_FOUND:
                    logger.info('no icognition data for label={0}'.format(
                        subject.id))
                    continue
                if r.status_code != requests.codes.OK:
                    raise iCognitionError('data url {0} responded {1}'.format(
                        r.url, r.status_code))
                # get the filename to save from content-disposition header
                if 'content-disposition' not in r.headers:
                    raise iCognitionError(
                        'no content-disposition in response from url {0}'.
                        format(r.url))
                fname = re.findall('filename="(.+)"',
                                   r.headers['content-disposition'])
                if len(fname) != 1 or not fname[0]:
                    raise iCognitionError(
                        'filename expected in content-disposition: {0}'.format(
                            fname))
                fname = fname[0]
                # verify response content integrity
                content = r.content
                content_len = r.raw._fp_bytes_read  # you need the number bytes read before any decoding
                if 'content-length' not in r.headers:
                    logger.warn(
                        'server did not return a content-length header, can\'t verify response integrity'
                    )
                else:
                    expected_len = int(r.headers['content-length'])
                    if content_len != expected_len:
                        raise iCognitionError(
                            'content length {0} does not match expected length {1}'
                            .format(content_len, expected_len))
                # save the file atomically
                dst = tree.get('cogassess',
                               subject.general_folder,
                               BIDS=Lochness['BIDS'])
                dst = os.path.join(dst, fname)
                if os.path.exists(dst):
                    return
                logger.debug(
                    'saving icognition response content to {0}'.format(dst))
                if not dry:
                    lochness.atomic_write(dst, content)
Exemplo n.º 6
0
def sync(Lochness, subject, dry=False):
    logger.debug('exploring {0}/{1}'.format(subject.study, subject.id))
    for alias, onlinescoring_ids in iter(subject.icognition.items()):
        base, user, password = credentials(Lochness, 'onlinescoring')
        with requests.Session() as s:

            # login
            url = '{0}/ajax/login.php'.format(base)
            data = {'username': user, 'password': password}
            r = s.post(url, data=data, verify=False)
            if r.status_code != requests.codes.OK:
                raise OnlineScoringError('login url {0} responded {1}'.format(
                    r.url, r.status_code))
            # get subject data
            params = {'final': 'true', 'format': 'csv', 'label': subject.id}
            url = '{0}/ajax/subjectdata.php'.format(base)
            r = s.get(url, params=params, stream=True, verify=False)
            if r.status_code != requests.codes.OK:
                raise OnlineScoringError('data url {0} responded {1}'.format(
                    r.url, r.status_code, r.url))
            # get the filename to save from content-disposition header
            if 'content-disposition' not in r.headers:
                message = r.json()
                if 'success' in message and message['success'] == 0:
                    logger.info('no onlinescoring data for label={0}'.format(
                        subject.id))
                    continue
                raise OnlineScoringError(
                    'no content-disposition response header for {0}'.format(
                        r.url))
            fname = re.findall('filename=(.+)',
                               r.headers['content-disposition'])
            if len(fname) != 1 or not fname[0]:
                raise OnlineScoringError(
                    'filename expected in content-disposition: {0}'.format(
                        fname))
            fname = fname[0]
            # verify response content integrity
            content = r.content
            content_len = r.raw._fp_bytes_read  # you need the number bytes read before any decoding
            if 'content-length' not in r.headers:
                logger.warn(
                    'server did not return a content-length header, can\'t verify response integrity'
                )
            else:
                expected_len = int(r.headers['content-length'])
                if content_len != expected_len:
                    raise OnlineScoringError(
                        'content length {0} does not match expected length {1}'
                        .format(content_len, expected_len))
            # save the file atomically
            dst = tree.get('retroquest', subject.general_folder)
            dst = os.path.join(dst, fname)
            if os.path.exists(dst):
                return
            logger.debug(
                'saving onlinescoring response content to {0}'.format(dst))
            if not dry:
                lochness.atomic_write(dst, content)
Exemplo n.º 7
0
def sync(Lochness, subject, dry=False):
    logger.debug(f'exploring {subject.study}/{subject.id}')

    # for each subject
    subject_id = subject.id
    study_name = subject.study
    study_rpms = Lochness['keyring'][f'rpms.{study_name}']
    rpms_root_path = study_rpms['RPMS_PATH']

    # source data
    all_df_dict = get_rpms_database(rpms_root_path)
    subject_df_dict = get_subject_data(all_df_dict, subject)

    for measure, source_df in subject_df_dict.items():
        # target data
        dirname = tree.get('surveys',
                           subject.protected_folder,
                           processed=False,
                           BIDS=Lochness['BIDS'])
        target_df_loc = Path(dirname) / f"{subject_id}_{measure}.csv"

        proc_folder = tree.get('surveys',
                               subject.general_folder,
                               processed=True,
                               BIDS=Lochness['BIDS'])
        proc_dst = Path(proc_folder) / f"{subject_id}_{measure}.csv"

        # load the time of the lastest data pull from daris
        # estimated from the mtime of the zip file downloaded
        if Path(target_df_loc).is_file():
            latest_pull_mtime = target_df_loc.stat().st_mtime
        else:
            latest_pull_mtime = 0

        # if last_modified date > latest_pull_mtime, pull the data
        if not source_df['last_modified'].max() > latest_pull_mtime:
            print('No new updates')
            break

        if not dry:
            Path(dirname).mkdir(exist_ok=True)
            os.chmod(dirname, 0o0755)
            source_df.to_csv(target_df_loc, index=False)
            os.chmod(target_df_loc, 0o0755)
            process_and_copy_db(Lochness, subject, target_df_loc, proc_dst)
Exemplo n.º 8
0
def sync(Lochness, subject, dry=False):
    logger.debug('exploring {0}/{1}'.format(subject.study, subject.id))
    deidentify = deidentify_flag(Lochness, subject.study)
    logger.debug('deidentify for study {0} is {1}'.format(subject.study, deidentify))
    for redcap_instance,redcap_subject in iterate(subject):
        for redcap_project,api_url,api_key in redcap_projects(Lochness, subject.study, redcap_instance):
            _debug_tup = (redcap_instance, redcap_project, redcap_subject)
            record_query = {
                'token': api_key,
                'content' : 'record',
                'format': 'json',
                'records': redcap_subject
            }
            if deidentify:
                # get fields that aren't identifiable and narrow record query by field name
                metadata_query = {
                    'token': api_key,
                    'content' : 'metadata',
                    'format': 'json'
                }
                content = post_to_redcap(api_url, metadata_query, _debug_tup)
                metadata = json.loads(content)
                field_names = []
                for field in metadata:
                    if field['identifier'] != 'y':
                        field_names.append(field['field_name'])
                record_query['fields'] = ','.join(field_names)
            # post query to redcap
            content = post_to_redcap(api_url, record_query, _debug_tup)
            # check if response body is nothing but a sad empty array
            if content.strip() == '[]':
                logger.info('no redcap data for {0}'.format(redcap_subject))
                continue
            # process the response content
            _redcap_project = re.sub('[\W]+', '_', redcap_project.strip())
            dst_folder = tree.get('surveys', subject.general_folder)
            fname = os.path.join(dst_folder, '{0}.{1}.json'.format(redcap_subject, _redcap_project))
            dst = os.path.join(dst_folder, fname)
            if not dry:
                if not os.path.exists(dst):
                    logger.debug('saving {0}'.format(dst))
                    lochness.atomic_write(dst, content)
                else:
                    # responses are not stored atomically in redcap
                    crc_src = lochness.crc32(content.decode('utf-8'))
                    crc_dst = lochness.crc32file(dst)
                    if crc_dst != crc_src:
                        logger.warn('file has changed {0}'.format(dst))
                        lochness.backup(dst)
                        logger.debug('saving {0}'.format(dst))
                        lochness.atomic_write(dst, content)
Exemplo n.º 9
0
def sync(Lochness, subject, dry=False):
    logger.debug('exploring {0}/{1}'.format(subject.study, subject.id))

    for alias, xnat_uids in iter(subject.xnat.items()):
        Keyring = Lochness['keyring'][alias]
        auth = yaxil.XnatAuth(url=Keyring['URL'],
                              username=Keyring['USERNAME'],
                              password=Keyring['PASSWORD'])
        for xnat_uid in xnat_uids:
            for experiment in experiments(auth, xnat_uid):
                logger.info(experiment)
                dirname = tree.get('mri',
                                   subject.protected_folder,
                                   processed=False,
                                   BIDS=Lochness['BIDS'])
                dst = os.path.join(dirname, experiment.label)
                if os.path.exists(dst):
                    try:
                        check_consistency(dst, experiment)
                        continue
                    except ConsistencyError as e:
                        logger.warn(e)
                        message = 'A conflict was detected in study {0}'.format(
                            subject.study)
                        lochness.notify(Lochness, message, study=subject.study)
                        #lochness.backup(dst)
                        continue
                message = 'downloading {PROJECT}/{LABEL} to {FOLDER}'
                logger.debug(
                    message.format(PROJECT=experiment.project,
                                   LABEL=experiment.label,
                                   FOLDER=dst))
                if not dry:
                    tmpdir = tf.mkdtemp(dir=dirname, prefix='.')
                    os.chmod(tmpdir, 0o0755)
                    yaxil.download(auth,
                                   experiment.label,
                                   project=experiment.project,
                                   scan_ids=['ALL'],
                                   out_dir=tmpdir,
                                   in_mem=False,
                                   attempts=3)
                    logger.debug('saving .experiment file')
                    save_experiment_file(tmpdir, auth.url, experiment)
                    os.rename(tmpdir, dst)
Exemplo n.º 10
0
def sync(Lochness, subject, dry=False):

    # load dataframe for redcap data entry trigger
    db_df = get_data_entry_trigger_df(Lochness)

    logger.debug(f'exploring {subject.study}/{subject.id}')
    deidentify = deidentify_flag(Lochness, subject.study)

    logger.debug(f'deidentify for study {subject.study} is {deidentify}')

    for redcap_instance, redcap_subject in iterate(subject):
        for redcap_project, api_url, api_key in redcap_projects(
                Lochness, subject.study, redcap_instance):
            # process the response content
            _redcap_project = re.sub(r'[\W]+', '_', redcap_project.strip())

            # default location to protected folder
            dst_folder = tree.get('surveys',
                                  subject.protected_folder,
                                  processed=False,
                                  BIDS=Lochness['BIDS'])
            fname = f'{redcap_subject}.{_redcap_project}.json'
            dst = Path(dst_folder) / fname

            # PII processed content to general processed
            proc_folder = tree.get('surveys',
                                   subject.general_folder,
                                   processed=True,
                                   BIDS=Lochness['BIDS'])

            proc_dst = Path(proc_folder) / fname

            # check if the data has been updated by checking the redcap data
            # entry trigger db
            if dst.is_file():
                if check_if_modified(redcap_subject, dst, db_df):
                    pass  # if modified, carry on
                else:
                    print("\n----")
                    print("No updates - not downloading REDCap data")
                    print("----\n")
                    break  # if not modified break

            print("\n----")
            print("Downloading REDCap data")
            print("----\n")
            _debug_tup = (redcap_instance, redcap_project, redcap_subject)

            record_query = {
                'token': api_key,
                'content': 'record',
                'format': 'json',
                'records': redcap_subject
            }

            if deidentify:
                # get fields that aren't identifiable and narrow record query
                # by field name
                metadata_query = {
                    'token': api_key,
                    'content': 'metadata',
                    'format': 'json'
                }

                content = post_to_redcap(api_url, metadata_query, _debug_tup)
                metadata = json.loads(content)
                field_names = []
                for field in metadata:
                    if field['identifier'] != 'y':
                        field_names.append(field['field_name'])
                record_query['fields'] = ','.join(field_names)

            # post query to redcap
            content = post_to_redcap(api_url, record_query, _debug_tup)

            # check if response body is nothing but a sad empty array
            if content.strip() == '[]':
                logger.info(f'no redcap data for {redcap_subject}')
                continue

            if not dry:
                if not os.path.exists(dst):
                    logger.debug(f'saving {dst}')
                    lochness.atomic_write(dst, content)
                    process_and_copy_db(Lochness, subject, dst, proc_dst)
                    # update_study_metadata(subject, json.loads(content))

                else:
                    # responses are not stored atomically in redcap
                    crc_src = lochness.crc32(content.decode('utf-8'))
                    crc_dst = lochness.crc32file(dst)

                    if crc_dst != crc_src:
                        print('different - crc32: downloading data')
                        logger.warn(f'file has changed {dst}')
                        lochness.backup(dst)
                        logger.debug(f'saving {dst}')
                        lochness.atomic_write(dst, content)
                        process_and_copy_db(Lochness, subject, dst, proc_dst)
                        # update_study_metadata(subject, json.loads(content))
                    else:
                        print('it is the same file (crc32). '
                              'Not saving the data')
                        # update the dst file's mtime so it can prevent the
                        # same file being pulled from REDCap
                        os.utime(dst)
Exemplo n.º 11
0
def sync(Lochness, subject, dry=False):
    '''
    Sync beiwe data
    '''
    logger.debug('exploring {0}/{1}'.format(subject.study, subject.id))
    backfill_start = Lochness['beiwe'].get('backfill_start',
                                           BACKFILL_START_FALLBACK)
    if str(backfill_start).strip().lower() == 'consent':
        backfill_start = subject.consent
    for alias, beiwe_uids in iter(subject.beiwe.items()):
        logger.debug('getting {0} from keyring'.format(alias))
        Keyring = mano.keyring(alias,
                               keyring_file=Lochness['keyring_file'],
                               passphrase=config.load.passphrase)
        base_url = Keyring['URL']
        for uid in beiwe_uids:
            study_frag, beiwe_id = uid
            study_name, study_id = mano.expand_study_id(Keyring, study_frag)
            study_name_enc = study_name.encode('utf-8')
            dst_general_folder = tree.get('phone',
                                          subject.general_folder,
                                          beiwe_id=beiwe_id,
                                          BIDS=Lochness['BIDS'])
            dst_protected_folder = tree.get('phone',
                                            subject.protected_folder,
                                            beiwe_id=beiwe_id,
                                            BIDS=Lochness['BIDS'])
            # save a hidden file with the study id, original name, and sanitized name
            save_study_file(dst_general_folder, study_id, study_name)
            protected_streams = set(PROTECT)
            general_streams = set(mano.DATA_STREAMS) - protected_streams
            # begin backfill download of all GENERAL data streams
            logger.info(
                'backfill general streams for subject={0}, study={1}, url={2}'.
                format(beiwe_id, study_name_enc, base_url))
            mano.sync.backfill(Keyring,
                               study_id,
                               beiwe_id,
                               os.path.dirname(dst_general_folder),
                               start_date=str(backfill_start),
                               data_streams=general_streams)
            # begin backfill download of all PROTECTED data streams
            passphrase = Lochness['keyring']['lochness']['SECRETS'].get(
                subject.study, None)
            logger.info(
                'backfill protected streams for subject={0}, study={1}, url={2}'
                .format(beiwe_id, study_name_enc, base_url))
            mano.sync.backfill(Keyring,
                               study_id,
                               beiwe_id,
                               os.path.dirname(dst_protected_folder),
                               start_date=str(backfill_start),
                               data_streams=protected_streams,
                               lock=protected_streams,
                               passphrase=passphrase)
            # begin delta download of all GENERAL data streams
            registry = None
            registry_file = os.path.join(dst_general_folder, '.registry')
            if os.path.exists(registry_file):
                logger.debug(
                    'reading in registry file {0}'.format(registry_file))
                with open(registry_file) as fo:
                    registry = fo.read()
            else:
                logger.warn(
                    'no registry file on disk {0}'.format(registry_file))
            logger.info(
                'delta download of general data streams for subject={0}, study={1}, url={2}'
                .format(beiwe_id, study_name_enc, base_url))
            archive = mano.sync.download(Keyring,
                                         study_id,
                                         beiwe_id,
                                         data_streams=general_streams,
                                         registry=registry)
            mano.sync.save(Keyring, archive, beiwe_id,
                           os.path.dirname(dst_general_folder))
            # begin delta download of all PROTECTED streams
            registry = None
            registry_file = os.path.join(dst_protected_folder, '.registry')
            if os.path.exists(registry_file):
                logger.debug('reading registry file {0}'.format(registry_file))
                with open(registry_file) as fo:
                    registry = fo.read()
            else:
                logger.warn(
                    'no registry file on disk {0}'.format(registry_file))
            logger.info(
                'delta download of protected data streams for subject={0}, study={1}, url={2}'
                .format(beiwe_id, study_name_enc, base_url))
            archive = mano.sync.download(Keyring,
                                         study_id,
                                         beiwe_id,
                                         data_streams=protected_streams,
                                         registry=registry)
            mano.sync.save(Keyring,
                           archive,
                           beiwe_id,
                           os.path.dirname(dst_protected_folder),
                           lock=protected_streams,
                           passphrase=passphrase)
Exemplo n.º 12
0
def sync_module(Lochness: 'lochness.config', subject: 'subject.metadata',
                study_name: 'mediaflux.study_name', dry: bool):
    '''sync mediaflux data for the subject'''

    if dry:
        raise NotImplementedError('--dry option is not implemented')

    study_basename = study_name.split('.')[1]

    for mf_subid in subject.mediaflux[study_name]:
        logger.debug(f'exploring {subject.study}/{subject.id}')
        _passphrase = keyring.passphrase(Lochness, subject.study)
        enc_key = enc.kdf(_passphrase)

        mflux_cfg = keyring.mediaflux_api_token(Lochness, study_name)

        mf_base = base(Lochness, study_basename)

        print(mf_base)

        for datatype, products in \
            iter(Lochness['mediaflux'][study_basename]['file_patterns'].items()):

            print(datatype, products)
            '''
            file_patterns:
                actigraphy:
                    - vendor: Philips
                      product: Actiwatch 2
                      data_dir: all_BWH_actigraphy
                      pattern: 'accel/*csv'
                      protect: True
                    - vendor: Activinsights
                      product: GENEActiv
                      data_dir: all_BWH_actigraphy
                      pattern: 'GENEActiv/*bin,GENEActiv/*csv'
            
            '''

            for prod in products:
                for patt in prod['pattern'].split(','):

                    # consider the case with space
                    # pattern: 'GENEActiv/*bin, GENEActiv/*csv'
                    patt = patt.strip()

                    if '*' not in patt:
                        raise PatternError(
                            'Mediaflux pattern must include an asterisk e.g. *csv or GENEActiv/*csv'
                        )

                    # construct mediaflux remote dir
                    mf_remote_pattern = pjoin(mf_base, prod['data_dir'],
                                              mf_subid, patt)
                    mf_remote_dir = dirname(mf_remote_pattern)

                    # obtain mediaflux remote paths
                    with tempfile.TemporaryDirectory() as tmpdir:
                        diff_path = pjoin(tmpdir, 'diff.csv')
                        cmd = (' ').join([
                            'unimelb-mf-check', '--mf.config', mflux_cfg,
                            '--nb-retries 5', '--direction down', tmpdir,
                            mf_remote_dir, '-o', diff_path
                        ])

                        p = Popen(cmd, shell=True)
                        p.wait()

                        # ENH
                        # if dry: exit()

                        if not isfile(diff_path):
                            continue

                        df = pd.read_csv(diff_path)
                        for remote in df['SRC_PATH'].values:

                            if remote is nan:
                                continue

                            if not re.search(patt.replace('*', '(.+?)'),
                                             remote):
                                continue
                            else:
                                remote = remote.split(':')[1]

                            # construct local path
                            protect = prod.get('protect', True)
                            processed = prod.get('processed', False)
                            key = enc_key if protect else None
                            subj_dir = subject.protected_folder \
                                if protect else subject.general_folder

                            # mf_local= pjoin(subj_dir, datatype, dirname(patt), basename(remote))
                            mf_local = str(
                                tree.get(datatype,
                                         subj_dir,
                                         processed=processed,
                                         BIDS=Lochness['BIDS']))

                            # ENH set different permissions
                            # GENERAL: 0o755, PROTECTED: 0700
                            os.makedirs(mf_local, exist_ok=True)

                            # subprocess call unimelb-mf-download
                            cmd = (' ').join([
                                'unimelb-mf-download', '--mf.config',
                                mflux_cfg, '-o', mf_local, '--nb-retries 5',
                                f'\"{remote}\"'
                            ])

                            p = Popen(cmd, shell=True)
                            p.wait()

                            # verify checksum after download completes
                            # if checksum does not match, data will be downloaded again
                            # ENH should we verify checksum 5 times?
                            cmd += ' --csum-check'
                            p = Popen(cmd, shell=True)
                            p.wait()
Exemplo n.º 13
0
def sync(Lochness: 'lochness.config',
         subject: 'subject.metadata',
         dry: bool = False):
    '''Sync mindlamp data

    To do:
    - Currently the mindlamp participant id is set by mindlamp, when the
      participant object is created. API can download all list of participant
      ids, but there is no mapping of which id corresponds to which subject.
    - Above information has to be added to the metadata.csv file.
    - Add ApiExceptions
    '''
    logger.debug(f'exploring {subject.study}/{subject.id}')
    deidentify = deidentify_flag(Lochness, subject.study)
    logger.debug(f'deidentify for study {subject.study} is {deidentify}')

    # get keyring for mindlamp
    api_url, access_key, secret_key = mindlamp_projects(
        Lochness, subject.mindlamp)

    # connect to mindlamp API sdk
    # LAMP.connect(access_key, secret_key, api_url)
    LAMP.connect(access_key, secret_key)

    # Extra information for future version
    # study_id, study_name = get_study_lamp(LAMP)
    # subject_ids = get_participants_lamp(LAMP, study_id)

    subject_id = subject.mindlamp[f'mindlamp.{subject.study}'][0]

    # pull data from mindlamp
    activity_dicts = get_activity_events_lamp(LAMP, subject_id)
    sensor_dicts = get_sensor_events_lamp(LAMP, subject_id)

    # set destination folder
    # dst_folder = tree.get('mindlamp', subject.general_folder)
    dst_folder = tree.get('mindlamp',
                          subject.protected_folder,
                          processed=False,
                          BIDS=Lochness['BIDS'])

    # store both data types
    for data_name, data_dict in zip(['activity', 'sensor'],
                                    [activity_dicts, sensor_dicts]):
        dst = os.path.join(dst_folder,
                           f'{subject_id}_{subject.study}_{data_name}.json')

        jsonData = json.dumps(data_dict,
                              sort_keys=True,
                              indent='\t',
                              separators=(',', ': '))

        content = jsonData.encode()

        if not Path(dst).is_file():
            lochness.atomic_write(dst, content)
        else:  # compare existing json to the new json
            crc_src = lochness.crc32(content.decode('utf-8'))
            crc_dst = lochness.crc32file(dst)
            if crc_dst != crc_src:
                logger.warn(f'file has changed {dst}')
                lochness.backup(dst)
                logger.debug(f'saving {dst}')
                lochness.atomic_write(dst, content)
Exemplo n.º 14
0
def sync_module(Lochness: 'lochness.config', subject: 'subject.metadata',
                module_name: 'box.module_name', dry: bool):
    '''Sync box data for the subject'''

    # only the module_name string without 'box.'
    module_basename = module_name.split('.')[1]

    # delete on success
    delete = delete_on_success(Lochness, module_basename)
    logger.debug(f'delete_on_success for {module_basename} is {delete}')

    for bx_sid in subject.box[module_name]:
        logger.debug(f'exploring {subject.study}/{subject.id}')
        _passphrase = keyring.passphrase(Lochness, subject.study)
        enc_key = enc.kdf(_passphrase)

        client_id, client_secret, api_token = keyring.box_api_token(
            Lochness, module_name)

        # box authentication
        auth = OAuth2(
            client_id=client_id,
            client_secret=client_secret,
            access_token=api_token,
        )
        client = Client(auth)

        bx_base = base(Lochness, module_basename)

        # get the id of the bx_base path in box
        bx_base_obj = get_box_object_based_on_name(client, bx_base, '0')

        if bx_base_obj == None:
            logger.debug('Root of the box is not found')
            continue

        # loop through the items defined for the BOX data
        for datatype, products in iter(
                Lochness['box'][module_basename]['file_patterns'].items()):
            subject_obj = get_box_object_based_on_name(client, bx_sid,
                                                       bx_base_obj.id)

            if subject_obj == None:
                logger.debug(f'{bx_sid} is not found under {bx_base_obj}')
                continue

            datatype_obj = get_box_object_based_on_name(
                client, datatype, subject_obj.id)

            # full path
            bx_head = join(bx_base, datatype, bx_sid)

            logger.debug('walking %s', bx_head)

            # if the directory is empty
            if datatype_obj == None:
                continue

            # walk through the root directory
            for root, dirs, files in walk_from_folder_object(
                    bx_head, datatype_obj):

                for box_file_object in files:
                    bx_tail = join(basename(root), box_file_object.name)
                    product = _find_product(bx_tail, products, subject=bx_sid)
                    if not product:
                        continue

                    protect = product.get('protect', False)
                    output_base = subject.protected_folder \
                                  if protect else subject.general_folder

                    encrypt = product.get('encrypt', False)
                    key = enc_key if encrypt else None

                    processed = product.get('processed', False)

                    # For DPACC, get processed from the config.yml
                    output_base = tree.get(datatype,
                                           output_base,
                                           processed=processed,
                                           BIDS=Lochness['BIDS'])

                    compress = product.get('compress', False)

                    save(box_file_object, (root, box_file_object.name),
                         output_base,
                         key=key,
                         compress=compress,
                         delete=False,
                         dry=False)